coregrind/m_debuginfo/readdwarf3.c

   1 /* -*- mode: C; c-basic-offset: 3; -*- */
   2
   3 /*--------------------------------------------------------------------*/
   4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
   5 /*---                                                 readdwarf3.c ---*/
   6 /*--------------------------------------------------------------------*/
   7
   8 /*
   9    This file is part of Valgrind, a dynamic binary instrumentation
  10    framework.
  11
  12    Copyright (C) 2008-2017 OpenWorks LLP
  13       info@open-works.co.uk
  14
  15    This program is free software; you can redistribute it and/or
  16    modify it under the terms of the GNU General Public License as
  17    published by the Free Software Foundation; either version 2 of the
  18    License, or (at your option) any later version.
  19
  20    This program is distributed in the hope that it will be useful, but
  21    WITHOUT ANY WARRANTY; without even the implied warranty of
  22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23    General Public License for more details.
  24
  25    You should have received a copy of the GNU General Public License
  26    along with this program; if not, write to the Free Software
  27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  28    02111-1307, USA.
  29
  30    The GNU General Public License is contained in the file COPYING.
  31
  32    Neither the names of the U.S. Department of Energy nor the
  33    University of California nor the names of its contributors may be
  34    used to endorse or promote products derived from this software
  35    without prior written permission.
  36 */
  37
  38 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
  39
  40 /* REFERENCE (without which this code will not make much sense):
  41
  42    DWARF Debugging Information Format, Version 3,
  43    dated 20 December 2005 (the "D3 spec").
  44
  45    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
  46    .doc (MS Word) version, but for some reason the section numbers
  47    between the Word and PDF versions differ by 1 in the first digit.
  48    All section references in this code are to the PDF version.
  49
  50    CURRENT HACKS:
  51
  52    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
  53       assumed to mean "const void" or "volatile void" respectively.
  54       GDB appears to interpret them like this, anyway.
  55
  56    In many cases it is important to know the svma of a CU (the "base
  57    address of the CU", as the D3 spec calls it).  There are some
  58    situations in which the spec implies this value is unknown, but the
  59    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
  60    merely zero when not explicitly stated.  So we too have to make
  61    that assumption.
  62
  63    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
  64    unitary_range_list() bias the resulting range list in the same way
  65    that its more general cousin, get_range_list(), does?  I don't
  66    know.
  67
  68    TODO, 2008 Feb 17:
  69
  70    get rid of cu_svma_known and document the assumed-zero svma hack.
  71
  72    ML_(sizeOfType): differentiate between zero sized types and types
  73    for which the size is unknown.  Is this important?  I don't know.
  74
  75    DW_TAG_array_types: deal with explicit sizes (currently we compute
  76    the size from the bounds and the element size, although that's
  77    fragile, if the bounds incompletely specified, or completely
  78    absent)
  79
  80    Document reason for difference (by 1) of stack preening depth in
  81    parse_var_DIE vs parse_type_DIE.
  82
  83    Don't hand to ML_(addVars), vars whose locations are entirely in
  84    registers (DW_OP_reg*).  This is merely a space-saving
  85    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
  86    expressions correctly, by failing to evaluate them and hence
  87    effectively ignoring the variable with which they are associated.
  88
  89    Deal with DW_TAG_array_types which have element size != stride
  90
  91    In some cases, the info for a variable is split between two
  92    different DIEs (generally a declarer and a definer).  We punt on
  93    these.  Could do better here.
  94
  95    The 'data_bias' argument passed to the expression evaluator
  96    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
  97    MaybeUWord, to make it clear when we do vs don't know what it is
  98    for the evaluation of an expression.  At the moment zero is passed
  99    for this parameter in the don't know case.  That's a bit fragile
 100    and obscure; using a MaybeUWord would be clearer.
 101
 102    POTENTIAL PERFORMANCE IMPROVEMENTS:
 103
 104    Currently, duplicate removal and all other queries for the type
 105    entities array is done using cuOffset-based pointing, which
 106    involves a binary search (VG_(lookupXA)) for each access.  This is
 107    wildly inefficient, although simple.  It would be better to
 108    translate all the cuOffset-based references (iow, all the "R" and
 109    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
 110    'tyents' right at the start of dedup_types(), and use direct
 111    indexing (VG_(indexXA)) wherever possible after that.
 112
 113    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
 114    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
 115    points, and possibly also make an _UNCHECKED version which skips
 116    the range checks in performance-critical situations such as this.
 117
 118    Handle interaction between read_DIE and parse_{var,type}_DIE
 119    better.  Currently read_DIE reads the entire DIE just to find where
 120    the end is (and for debug printing), so that it can later reliably
 121    move the cursor to the end regardless of what parse_{var,type}_DIE
 122    do.  This means many DIEs (most, even?) are read twice.  It would
 123    be smarter to make parse_{var,type}_DIE return a Bool indicating
 124    whether or not they advanced the DIE cursor, and only if they
 125    didn't should read_DIE itself read through the DIE.
 126
 127    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
 128    zero variables in their .vars XArray.  Rather than have an XArray
 129    with zero elements (which uses 2 malloc'd blocks), allow the .vars
 130    pointer to be NULL in this case.
 131
 132    More generally, reduce the amount of memory allocated and freed
 133    while reading Dwarf3 type/variable information.  Even modest (20MB)
 134    objects cause this module to allocate and free hundreds of
 135    thousands of small blocks, and ML_(arena_malloc) and its various
 136    groupies always show up at the top of performance profiles. */
 137
 138 #include "pub_core_basics.h"
 139 #include "pub_core_debuginfo.h"
 140 #include "pub_core_libcbase.h"
 141 #include "pub_core_libcassert.h"
 142 #include "pub_core_libcprint.h"
 143 #include "pub_core_libcsetjmp.h"   // setjmp facilities
 144 #include "pub_core_hashtable.h"
 145 #include "pub_core_options.h"
 146 #include "pub_core_tooliface.h"    /* VG_(needs) */
 147 #include "pub_core_xarray.h"
 148 #include "pub_core_wordfm.h"
 149 #include "priv_misc.h"             /* dinfo_zalloc/free */
 150 #include "priv_image.h"
 151 #include "priv_tytypes.h"
 152 #include "priv_d3basics.h"
 153 #include "priv_storage.h"
 154 #include "priv_readdwarf3.h"       /* self */
 155
 156
 157 /*------------------------------------------------------------*/
 158 /*---                                                      ---*/
 159 /*--- Basic machinery for parsing DIEs.                    ---*/
 160 /*---                                                      ---*/
 161 /*------------------------------------------------------------*/
 162
 163 #define TRACE_D3(format, args...) \
 164    if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
 165 #define TD3 (UNLIKELY(td3))
 166
 167 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
 168 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
 169
 170 typedef
 171    struct {
 172       DiSlice sli;      // to which this cursor applies
 173       DiOffT  sli_next; // offset in underlying DiImage; must be >= sli.ioff
 174       void (*barf)( const HChar* ) __attribute__((noreturn));
 175       const HChar* barfstr;
 176    }
 177    Cursor;
 178
 179 static inline Bool is_sane_Cursor ( const Cursor* c ) {
 180    if (!c)                return False;
 181    if (!c->barf)          return False;
 182    if (!c->barfstr)       return False;
 183    if (!ML_(sli_is_valid)(c->sli))    return False;
 184    if (c->sli.ioff == DiOffT_INVALID) return False;
 185    if (c->sli_next < c->sli.ioff)     return False;
 186    return True;
 187 }
 188
 189 // Initialise a cursor from a DiSlice (ELF section, really) so as to
 190 // start reading at offset |sli_initial_offset| from the start of the
 191 // slice.
 192 static void init_Cursor ( /*OUT*/Cursor* c,
 193                           DiSlice sli,
 194                           ULong   sli_initial_offset,
 195                           __attribute__((noreturn)) void (*barf)(const HChar*),
 196                           const HChar* barfstr )
 197 {
 198    vg_assert(c);
 199    VG_(bzero_inline)(c, sizeof(*c));
 200    c->sli              = sli;
 201    c->sli_next         = c->sli.ioff + sli_initial_offset;
 202    c->barf             = barf;
 203    c->barfstr          = barfstr;
 204    vg_assert(is_sane_Cursor(c));
 205 }
 206
 207 static Bool is_at_end_Cursor ( const Cursor* c ) {
 208    vg_assert(is_sane_Cursor(c));
 209    return c->sli_next >= c->sli.ioff + c->sli.szB;
 210 }
 211
 212 static inline ULong get_position_of_Cursor ( const Cursor* c ) {
 213    vg_assert(is_sane_Cursor(c));
 214    return c->sli_next - c->sli.ioff;
 215 }
 216 static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
 217    c->sli_next = c->sli.ioff + pos;
 218    vg_assert(is_sane_Cursor(c));
 219 }
 220 static inline void advance_position_of_Cursor ( Cursor* c, ULong delta ) {
 221    c->sli_next += delta;
 222    vg_assert(is_sane_Cursor(c));
 223 }
 224
 225 static /*signed*/Long get_remaining_length_Cursor ( const Cursor* c ) {
 226    vg_assert(is_sane_Cursor(c));
 227    return c->sli.ioff + c->sli.szB - c->sli_next;
 228 }
 229
 230 //static void* get_address_of_Cursor ( Cursor* c ) {
 231 //   vg_assert(is_sane_Cursor(c));
 232 //   return &c->region_start_img[ c->region_next ];
 233 //}
 234
 235 static DiCursor get_DiCursor_from_Cursor ( const Cursor* c ) {
 236    return mk_DiCursor(c->sli.img, c->sli_next);
 237 }
 238
 239 /* FIXME: document assumptions on endianness for
 240    get_UShort/UInt/ULong. */
 241 static inline UChar get_UChar ( Cursor* c ) {
 242    UChar r;
 243    vg_assert(is_sane_Cursor(c));
 244    if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
 245       c->barf(c->barfstr);
 246       /*NOTREACHED*/
 247       vg_assert(0);
 248    }
 249    r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
 250    c->sli_next += sizeof(UChar);
 251    return r;
 252 }
 253 static UShort get_UShort ( Cursor* c ) {
 254    UShort r;
 255    vg_assert(is_sane_Cursor(c));
 256    if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
 257       c->barf(c->barfstr);
 258       /*NOTREACHED*/
 259       vg_assert(0);
 260    }
 261    r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
 262    c->sli_next += sizeof(UShort);
 263    return r;
 264 }
 265 static UInt get_UInt ( Cursor* c ) {
 266    UInt r;
 267    vg_assert(is_sane_Cursor(c));
 268    if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
 269       c->barf(c->barfstr);
 270       /*NOTREACHED*/
 271       vg_assert(0);
 272    }
 273    r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
 274    c->sli_next += sizeof(UInt);
 275    return r;
 276 }
 277 static ULong get_ULong ( Cursor* c ) {
 278    ULong r;
 279    vg_assert(is_sane_Cursor(c));
 280    if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
 281       c->barf(c->barfstr);
 282       /*NOTREACHED*/
 283       vg_assert(0);
 284    }
 285    r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
 286    c->sli_next += sizeof(ULong);
 287    return r;
 288 }
 289 static ULong get_ULEB128 ( Cursor* c ) {
 290    ULong result;
 291    Int   shift;
 292    UChar byte;
 293    /* unroll first iteration */
 294    byte = get_UChar( c );
 295    result = (ULong)(byte & 0x7f);
 296    if (LIKELY(!(byte & 0x80))) return result;
 297    shift = 7;
 298    /* end unroll first iteration */
 299    do {
 300       byte = get_UChar( c );
 301       result |= ((ULong)(byte & 0x7f)) << shift;
 302       shift += 7;
 303    } while (byte & 0x80);
 304    return result;
 305 }
 306 static Long get_SLEB128 ( Cursor* c ) {
 307    ULong  result = 0;
 308    Int    shift = 0;
 309    UChar  byte;
 310    do {
 311       byte = get_UChar(c);
 312       result |= ((ULong)(byte & 0x7f)) << shift;
 313       shift += 7;
 314    } while (byte & 0x80);
 315    if (shift < 64 && (byte & 0x40))
 316       result |= -(1ULL << shift);
 317    return result;
 318 }
 319
 320 /* Assume 'c' points to the start of a string.  Return a DiCursor of
 321    whatever it points at, and advance it past the terminating zero.
 322    This makes it safe for the caller to then copy the string with
 323    ML_(addStr), since (w.r.t. image overruns) the process of advancing
 324    past the terminating zero will already have "vetted" the string. */
 325 static DiCursor get_AsciiZ ( Cursor* c ) {
 326    UChar uc;
 327    DiCursor res = get_DiCursor_from_Cursor(c);
 328    do { uc = get_UChar(c); } while (uc != 0);
 329    return res;
 330 }
 331
 332 static ULong peek_ULEB128 ( Cursor* c ) {
 333    DiOffT here = c->sli_next;
 334    ULong  r    = get_ULEB128( c );
 335    c->sli_next = here;
 336    return r;
 337 }
 338 static UChar peek_UChar ( Cursor* c ) {
 339    DiOffT here = c->sli_next;
 340    UChar  r    = get_UChar( c );
 341    c->sli_next = here;
 342    return r;
 343 }
 344
 345 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
 346    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
 347 }
 348
 349 static UWord get_UWord ( Cursor* c ) {
 350    vg_assert(sizeof(UWord) == sizeof(void*));
 351    if (sizeof(UWord) == 4) return get_UInt(c);
 352    if (sizeof(UWord) == 8) return get_ULong(c);
 353    vg_assert(0);
 354 }
 355
 356 /* Read a DWARF3 'Initial Length' field */
 357 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
 358                                   Cursor* c,
 359                                   const HChar* barfMsg )
 360 {
 361    ULong w64;
 362    UInt  w32;
 363    *is64 = False;
 364    w32 = get_UInt( c );
 365    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
 366       c->barf( barfMsg );
 367    }
 368    else if (w32 == 0xFFFFFFFF) {
 369       *is64 = True;
 370       w64   = get_ULong( c );
 371    } else {
 372       *is64 = False;
 373       w64 = (ULong)w32;
 374    }
 375    return w64;
 376 }
 377
 378
 379 /*------------------------------------------------------------*/
 380 /*---                                                      ---*/
 381 /*--- "CUConst" structure                                  ---*/
 382 /*---                                                      ---*/
 383 /*------------------------------------------------------------*/
 384
 385 typedef
 386    struct _name_form {
 387       ULong at_name;  // Dwarf Attribute name
 388       ULong at_form;  // Dwarf Attribute form
 389       UInt  skip_szB; // Nr of bytes skippable from here ...
 390       UInt  next_nf;  // ... to reach this attr/form index in the g_abbv.nf
 391    } name_form;
 392 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
 393    Each name_form maintains how many (fixed) nr of bytes can be skipped from
 394    the beginning of this form till the next attr/form to look at.
 395    The next form to look can be:
 396        an 'interesting' attr/form to read while skipping a DIE
 397           (currently, this is only DW_AT_sibling)
 398    or
 399        a variable length form which must be read to be skipped.
 400    For a variable length form, the skip_szB will be equal to VARSZ_FORM.
 401
 402    Note: this technique could also be used to speed up the parsing
 403    of DIEs : for each parser kind, we could have the nr of bytes
 404    to skip to directly reach the interesting form(s) for the parser. */
 405
 406 typedef
 407    struct _g_abbv {
 408       struct _g_abbv *next; // read/write by hash table.
 409       UWord  abbv_code;     // key, read by hash table
 410       ULong  atag;
 411       ULong  has_children;
 412       name_form nf[0];
 413       /* Variable-length array of name/form pairs, terminated
 414          by a 0/0 pair.
 415          The skip_szB/next_nf allows to skip efficiently a DIE
 416          described by this g_abbv; */
 417     } g_abbv;
 418
 419 /* Holds information that is constant through the parsing of a
 420    Compilation Unit.  This is basically plumbed through to
 421    everywhere. */
 422 typedef
 423    struct {
 424       /* Call here if anything goes wrong */
 425       void (*barf)( const HChar* ) __attribute__((noreturn));
 426       /* Is this 64-bit DWARF ? */
 427       Bool   is_dw64;
 428       /* Which DWARF version ?  (2, 3 or 4) */
 429       UShort version;
 430       /* Length of this Compilation Unit, as stated in the
 431          .unit_length :: InitialLength field of the CU Header.
 432          However, this size (as specified by the D3 spec) does not
 433          include the size of the .unit_length field itself, which is
 434          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
 435          can be obtained through the expression ".is_dw64 ? 12 : 4". */
 436       ULong  unit_length;
 437       /* Offset of start of this unit in .debug_info */
 438       UWord  cu_start_offset;
 439       /* SVMA for this CU.  In the D3 spec, is known as the "base
 440          address of the compilation unit (last para sec 3.1.1).
 441          Needed for (amongst things) interpretation of location-list
 442          values. */
 443       Addr   cu_svma;
 444       Bool   cu_svma_known;
 445
 446       /* The debug_abbreviations table to be used for this Unit */
 447       //UChar* debug_abbv;
 448       /* Upper bound on size thereof (an overestimate, in general) */
 449       //UWord  debug_abbv_maxszB;
 450       /* A bounded area of the image, to be used as the
 451          debug_abbreviations table tobe used for this Unit. */
 452       DiSlice debug_abbv;
 453
 454       /* Image information for various sections. */
 455       DiSlice escn_debug_str;
 456       DiSlice escn_debug_ranges;
 457       DiSlice escn_debug_loc;
 458       DiSlice escn_debug_line;
 459       DiSlice escn_debug_info;
 460       DiSlice escn_debug_types;
 461       DiSlice escn_debug_info_alt;
 462       DiSlice escn_debug_str_alt;
 463       /* How much to add to .debug_types resp. alternate .debug_info offsets
 464          in cook_die*.  */
 465       UWord  types_cuOff_bias;
 466       UWord  alt_cuOff_bias;
 467       /* --- Needed so we can add stuff to the string table. --- */
 468       struct _DebugInfo* di;
 469       /* --- a hash table of g_abbv (i.e. parsed abbreviations) --- */
 470       VgHashTable *ht_abbvs;
 471
 472       /* True if this came from .debug_types; otherwise it came from
 473          .debug_info.  */
 474       Bool is_type_unit;
 475       /* For a unit coming from .debug_types, these hold the TU's type
 476          signature and the uncooked DIE offset of the TU's signatured
 477          type.  For a unit coming from .debug_info, these are unused.  */
 478       ULong type_signature;
 479       ULong type_offset;
 480
 481       /* Signatured type hash; computed once and then shared by all
 482          CUs.  */
 483       VgHashTable *signature_types;
 484
 485       /* True if this came from alternate .debug_info; otherwise
 486          it came from normal .debug_info or .debug_types.  */
 487       Bool is_alt_info;
 488    }
 489    CUConst;
 490
 491
 492 /* Return the cooked value of DIE depending on whether CC represents a
 493    .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
 494    .debug_types and optional alternate .debug_info sections form
 495    a contiguous whole, so that DIEs coming from .debug_types are numbered
 496    starting at the end of .debug_info and DIEs coming from alternate
 497    .debug_info are numbered starting at the end of .debug_types.  */
 498 static UWord cook_die( const CUConst* cc, UWord die )
 499 {
 500    if (cc->is_type_unit)
 501       die += cc->types_cuOff_bias;
 502    else if (cc->is_alt_info)
 503       die += cc->alt_cuOff_bias;
 504    return die;
 505 }
 506
 507 /* Like cook_die, but understand that DIEs coming from a
 508    DW_FORM_ref_sig8 reference are already cooked.  Also, handle
 509    DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
 510    as reference to alternate .debug_info.  */
 511 static UWord cook_die_using_form( const CUConst *cc, UWord die, DW_FORM form)
 512 {
 513    if (form == DW_FORM_ref_sig8)
 514       return die;
 515    if (form == DW_FORM_GNU_ref_alt)
 516       return die + cc->alt_cuOff_bias;
 517    return cook_die( cc, die );
 518 }
 519
 520 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
 521    came from the .debug_types section and *ALT_FLAG to true if the DIE
 522    came from alternate .debug_info section.  */
 523 static UWord uncook_die( const CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
 524                          Bool *alt_flag )
 525 {
 526    *alt_flag = False;
 527    *type_flag = False;
 528    /* The use of escn_debug_{info,types}.szB seems safe to me even if
 529       escn_debug_{info,types} are DiSlice_INVALID (meaning the
 530       sections were not found), because DiSlice_INVALID.szB is always
 531       zero.  That said, it seems unlikely we'd ever get here if
 532       .debug_info or .debug_types were missing. */
 533    if (die >= cc->escn_debug_info.szB) {
 534       if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
 535          *alt_flag = True;
 536          die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
 537       } else {
 538          *type_flag = True;
 539          die -= cc->escn_debug_info.szB;
 540       }
 541    }
 542    return die;
 543 }
 544
 545 /*------------------------------------------------------------*/
 546 /*---                                                      ---*/
 547 /*--- Helper functions for Guarded Expressions             ---*/
 548 /*---                                                      ---*/
 549 /*------------------------------------------------------------*/
 550
 551 /* Parse the location list starting at img-offset 'debug_loc_offset'
 552    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
 553    and so I believe are correct SVMAs for the object as a whole.  This
 554    function allocates the UChar*, and the caller must deallocate it.
 555    The resulting block is in so-called Guarded-Expression format.
 556
 557    Guarded-Expression format is similar but not identical to the DWARF3
 558    location-list format.  The format of each returned block is:
 559
 560       UChar biasMe;
 561       UChar isEnd;
 562       followed by zero or more of
 563
 564       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
 565
 566    '..bytes..' is an standard DWARF3 location expression which is
 567    valid when aMin <= pc <= aMax (possibly after suitable biasing).
 568
 569    The number of bytes in '..bytes..' is nbytes.
 570
 571    The end of the sequence is marked by an isEnd == 1 value.  All
 572    previous isEnd values must be zero.
 573
 574    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
 575    text_bias added before use, and 0 if the GX is this is not
 576    necessary (is ready to go).
 577
 578    Hence the block can be quickly parsed and is self-describing.  Note
 579    that aMax is 1 less than the corresponding value in a DWARF3
 580    location list.  Zero length ranges, with aMax == aMin-1, are not
 581    allowed.
 582 */
 583 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
 584    it more logically belongs. */
 585
 586
 587 /* Apply a text bias to a GX. */
 588 static void bias_GX ( /*MOD*/GExpr* gx, const DebugInfo* di )
 589 {
 590    UShort nbytes;
 591    UChar* p = &gx->payload[0];
 592    UChar* pA;
 593    UChar  uc;
 594    uc = *p++; /*biasMe*/
 595    if (uc == 0)
 596       return;
 597    vg_assert(uc == 1);
 598    p[-1] = 0; /* mark it as done */
 599    while (True) {
 600       uc = *p++;
 601       if (uc == 1)
 602          break; /*isEnd*/
 603       vg_assert(uc == 0);
 604       /* t-bias aMin */
 605       pA = (UChar*)p;
 606       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
 607       p += sizeof(Addr);
 608       /* t-bias aMax */
 609       pA = (UChar*)p;
 610       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
 611       p += sizeof(Addr);
 612       /* nbytes, and actual expression */
 613       nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
 614       p += nbytes;
 615    }
 616 }
 617
 618 __attribute__((noinline))
 619 static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
 620 {
 621    SizeT  bytesReqd;
 622    GExpr* gx;
 623    UChar *p, *pstart;
 624
 625    vg_assert(sizeof(UWord) == sizeof(Addr));
 626    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
 627    bytesReqd
 628       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
 629         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
 630         + sizeof(UShort) /*nbytes*/    + (SizeT)nbytes
 631         + sizeof(UChar); /*isEnd*/
 632
 633    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
 634                            sizeof(GExpr) + bytesReqd );
 635
 636    p = pstart = &gx->payload[0];
 637
 638    p = ML_(write_UChar)(p, 0);        /*biasMe*/
 639    p = ML_(write_UChar)(p, 0);        /*!isEnd*/
 640    p = ML_(write_Addr)(p, 0);         /*aMin*/
 641    p = ML_(write_Addr)(p, ~0);        /*aMax*/
 642    p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
 643    ML_(cur_read_get)(p, block, nbytes); p += nbytes;
 644    p = ML_(write_UChar)(p, 1);        /*isEnd*/
 645
 646    vg_assert( (SizeT)(p - pstart) == bytesReqd);
 647    vg_assert( &gx->payload[bytesReqd]
 648               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
 649
 650    return gx;
 651 }
 652
 653 __attribute__((noinline))
 654 static GExpr* make_general_GX ( const CUConst* cc,
 655                                 Bool     td3,
 656                                 ULong    debug_loc_offset,
 657                                 Addr     svma_of_referencing_CU )
 658 {
 659    Addr      base;
 660    Cursor    loc;
 661    XArray*   xa; /* XArray of UChar */
 662    GExpr*    gx;
 663    Word      nbytes;
 664
 665    vg_assert(sizeof(UWord) == sizeof(Addr));
 666    if (!ML_(sli_is_valid)(cc->escn_debug_loc) || cc->escn_debug_loc.szB == 0)
 667       cc->barf("make_general_GX: .debug_loc is empty/missing");
 668
 669    init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
 670                 "Overrun whilst reading .debug_loc section(2)" );
 671    set_position_of_Cursor( &loc, debug_loc_offset );
 672
 673    TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n",
 674             debug_loc_offset, get_DiCursor_from_Cursor(&loc).ioff );
 675
 676    /* Who frees this xa?  It is freed before this fn exits. */
 677    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
 678                     ML_(dinfo_free),
 679                     sizeof(UChar) );
 680
 681    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
 682
 683    base = 0;
 684    while (True) {
 685       Bool  acquire;
 686       UWord len;
 687       /* Read a (host-)word pair.  This is something of a hack since
 688          the word size to read is really dictated by the ELF file;
 689          however, we assume we're reading a file with the same
 690          word-sizeness as the host.  Reasonably enough. */
 691       UWord w1 = get_UWord( &loc );
 692       UWord w2 = get_UWord( &loc );
 693
 694       TRACE_D3("   %08lx %08lx\n", w1, w2);
 695       if (w1 == 0 && w2 == 0)
 696          break; /* end of list */
 697
 698       if (w1 == -1UL) {
 699          /* new value for 'base' */
 700          base = w2;
 701          continue;
 702       }
 703
 704       /* else a location expression follows */
 705       /* else enumerate [w1+base, w2+base) */
 706       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
 707          (sec 2.17.2) */
 708       if (w1 > w2) {
 709          TRACE_D3("negative range is for .debug_loc expr at "
 710                   "file offset %llu\n",
 711                   debug_loc_offset);
 712          cc->barf( "negative range in .debug_loc section" );
 713       }
 714
 715       /* ignore zero length ranges */
 716       acquire = w1 < w2;
 717       len     = (UWord)get_UShort( &loc );
 718
 719       if (acquire) {
 720          UWord  w;
 721          UShort s;
 722          UChar  c;
 723          c = 0; /* !isEnd*/
 724          VG_(addBytesToXA)( xa, &c, sizeof(c) );
 725          w = w1    + base + svma_of_referencing_CU;
 726          VG_(addBytesToXA)( xa, &w, sizeof(w) );
 727          w = w2 -1 + base + svma_of_referencing_CU;
 728          VG_(addBytesToXA)( xa, &w, sizeof(w) );
 729          s = (UShort)len;
 730          VG_(addBytesToXA)( xa, &s, sizeof(s) );
 731       }
 732
 733       while (len > 0) {
 734          UChar byte = get_UChar( &loc );
 735          TRACE_D3("%02x", (UInt)byte);
 736          if (acquire)
 737             VG_(addBytesToXA)( xa, &byte, 1 );
 738          len--;
 739       }
 740       TRACE_D3("\n");
 741    }
 742
 743    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
 744
 745    nbytes = VG_(sizeXA)( xa );
 746    vg_assert(nbytes >= 1);
 747
 748    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
 749    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
 750    vg_assert( &gx->payload[nbytes]
 751               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
 752
 753    VG_(deleteXA)( xa );
 754
 755    TRACE_D3("}\n");
 756
 757    return gx;
 758 }
 759
 760
 761 /*------------------------------------------------------------*/
 762 /*---                                                      ---*/
 763 /*--- Helper functions for range lists and CU headers      ---*/
 764 /*---                                                      ---*/
 765 /*------------------------------------------------------------*/
 766
 767 /* Denotes an address range.  Both aMin and aMax are included in the
 768    range; hence a complete range is (0, ~0) and an empty range is any
 769    (X, X-1) for X > 0.*/
 770 typedef
 771    struct { Addr aMin; Addr aMax; }
 772    AddrRange;
 773
 774
 775 /* Generate an arbitrary structural total ordering on
 776    XArray* of AddrRange. */
 777 static Word cmp__XArrays_of_AddrRange ( const XArray* rngs1,
 778                                         const XArray* rngs2 )
 779 {
 780    Word n1, n2, i;
 781    vg_assert(rngs1 && rngs2);
 782    n1 = VG_(sizeXA)( rngs1 );
 783    n2 = VG_(sizeXA)( rngs2 );
 784    if (n1 < n2) return -1;
 785    if (n1 > n2) return 1;
 786    for (i = 0; i < n1; i++) {
 787       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
 788       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
 789       if (rng1->aMin < rng2->aMin) return -1;
 790       if (rng1->aMin > rng2->aMin) return 1;
 791       if (rng1->aMax < rng2->aMax) return -1;
 792       if (rng1->aMax > rng2->aMax) return 1;
 793    }
 794    return 0;
 795 }
 796
 797
 798 __attribute__((noinline))
 799 static XArray* /* of AddrRange */ empty_range_list ( void )
 800 {
 801    XArray* xa; /* XArray of AddrRange */
 802    /* Who frees this xa?  varstack_preen() does. */
 803    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
 804                     ML_(dinfo_free),
 805                     sizeof(AddrRange) );
 806    return xa;
 807 }
 808
 809
 810 __attribute__((noinline))
 811 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
 812 {
 813    XArray*   xa;
 814    AddrRange pair;
 815    vg_assert(aMin <= aMax);
 816    /* Who frees this xa?  varstack_preen() does. */
 817    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
 818                     ML_(dinfo_free),
 819                     sizeof(AddrRange) );
 820    pair.aMin = aMin;
 821    pair.aMax = aMax;
 822    VG_(addToXA)( xa, &pair );
 823    return xa;
 824 }
 825
 826
 827 /* Enumerate the address ranges starting at img-offset
 828    'debug_ranges_offset' in .debug_ranges.  Results are biased with
 829    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
 830    object as a whole.  This function allocates the XArray, and the
 831    caller must deallocate it. */
 832 __attribute__((noinline))
 833 static XArray* /* of AddrRange */
 834 get_range_list ( const CUConst* cc,
 835                  Bool     td3,
 836                  UWord    debug_ranges_offset,
 837                  Addr     svma_of_referencing_CU )
 838 {
 839    Addr      base;
 840    Cursor    ranges;
 841    XArray*   xa; /* XArray of AddrRange */
 842    AddrRange pair;
 843
 844    if (!ML_(sli_is_valid)(cc->escn_debug_ranges)
 845        || cc->escn_debug_ranges.szB == 0)
 846       cc->barf("get_range_list: .debug_ranges is empty/missing");
 847
 848    init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
 849                 "Overrun whilst reading .debug_ranges section(2)" );
 850    set_position_of_Cursor( &ranges, debug_ranges_offset );
 851
 852    /* Who frees this xa?  varstack_preen() does. */
 853    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
 854                     sizeof(AddrRange) );
 855    base = 0;
 856    while (True) {
 857       /* Read a (host-)word pair.  This is something of a hack since
 858          the word size to read is really dictated by the ELF file;
 859          however, we assume we're reading a file with the same
 860          word-sizeness as the host.  Reasonably enough. */
 861       UWord w1 = get_UWord( &ranges );
 862       UWord w2 = get_UWord( &ranges );
 863
 864       if (w1 == 0 && w2 == 0)
 865          break; /* end of list. */
 866
 867       if (w1 == -1UL) {
 868          /* new value for 'base' */
 869          base = w2;
 870          continue;
 871       }
 872
 873       /* else enumerate [w1+base, w2+base) */
 874       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
 875          (sec 2.17.2) */
 876       if (w1 > w2)
 877          cc->barf( "negative range in .debug_ranges section" );
 878       if (w1 < w2) {
 879          pair.aMin = w1     + base + svma_of_referencing_CU;
 880          pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
 881          vg_assert(pair.aMin <= pair.aMax);
 882          VG_(addToXA)( xa, &pair );
 883       }
 884    }
 885    return xa;
 886 }
 887
 888 #define VARSZ_FORM 0xffffffff
 889 static UInt get_Form_szB (const CUConst* cc, DW_FORM form );
 890
 891 /* Initialises the hash table of abbreviations.
 892    We do a single scan of the abbv slice to parse and
 893    build all abbreviations, for the following reasons:
 894      * all or most abbreviations will be needed in any case
 895        (at least for var-info reading).
 896      * re-reading each time an abbreviation causes a lot of calls
 897        to get_ULEB128.
 898      * a CU should not have many abbreviations. */
 899 static void init_ht_abbvs (CUConst* cc,
 900                            Bool td3)
 901 {
 902    Cursor c;
 903    g_abbv *ta; // temporary abbreviation, reallocated if needed.
 904    UInt ta_nf_maxE; // max nr of pairs in ta.nf[], doubled when reallocated.
 905    UInt ta_nf_n;    // nr of pairs in ta->nf that are initialised.
 906    g_abbv *ht_ta; // abbv to insert in hash table.
 907    Int i;
 908
 909    #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
 910
 911    ta_nf_maxE = 10; // starting with enough for 9 pairs+terminating pair.
 912    ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE));
 913    cc->ht_abbvs = VG_(HT_construct) ("di.readdwarf3.ht_abbvs");
 914
 915    init_Cursor( &c, cc->debug_abbv, 0, cc->barf,
 916                "Overrun whilst parsing .debug_abbrev section(2)" );
 917    while (True) {
 918       ta->abbv_code = get_ULEB128( &c );
 919       if (ta->abbv_code == 0) break; /* end of the table */
 920
 921       ta->atag = get_ULEB128( &c );
 922       ta->has_children = get_UChar( &c );
 923       ta_nf_n = 0;
 924       while (True) {
 925          if (ta_nf_n >= ta_nf_maxE) {
 926             g_abbv *old_ta = ta;
 927             ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf",
 928                                     SZ_G_ABBV(2 * ta_nf_maxE));
 929             ta_nf_maxE = 2 * ta_nf_maxE;
 930             VG_(memcpy) (ta, old_ta, SZ_G_ABBV(ta_nf_n));
 931             ML_(dinfo_free) (old_ta);
 932          }
 933          ta->nf[ta_nf_n].at_name = get_ULEB128( &c );
 934          ta->nf[ta_nf_n].at_form = get_ULEB128( &c );
 935          if (ta->nf[ta_nf_n].at_name == 0 && ta->nf[ta_nf_n].at_form == 0) {
 936             ta_nf_n++;
 937             break;
 938          }
 939         ta_nf_n++;
 940       }
 941
 942       // Initialises the skip_szB/next_nf elements : an element at position
 943       // i must contain the sum of its own size + the sizes of all elements
 944       // following i till either the next variable size element, the next
 945       // sibling element or the end of the DIE.
 946       ta->nf[ta_nf_n - 1].skip_szB = 0;
 947       ta->nf[ta_nf_n - 1].next_nf = 0;
 948       for (i = ta_nf_n - 2; i >= 0; i--) {
 949          const UInt form_szB = get_Form_szB (cc, (DW_FORM)ta->nf[i].at_form);
 950
 951          if (ta->nf[i+1].at_name == DW_AT_sibling
 952              || ta->nf[i+1].skip_szB == VARSZ_FORM) {
 953             ta->nf[i].skip_szB = form_szB;
 954             ta->nf[i].next_nf  = i+1;
 955          } else if (form_szB == VARSZ_FORM) {
 956             ta->nf[i].skip_szB = form_szB;
 957             ta->nf[i].next_nf  = i+1;
 958          } else {
 959             ta->nf[i].skip_szB = ta->nf[i+1].skip_szB + form_szB;
 960             ta->nf[i].next_nf  = ta->nf[i+1].next_nf;
 961          }
 962       }
 963
 964       ht_ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n));
 965       VG_(memcpy) (ht_ta, ta, SZ_G_ABBV(ta_nf_n));
 966       VG_(HT_add_node) ( cc->ht_abbvs, ht_ta );
 967       if (TD3) {
 968          TRACE_D3("  Adding abbv_code %lu TAG  %s [%s] nf %u ",
 969                   ht_ta->abbv_code, ML_(pp_DW_TAG)(ht_ta->atag),
 970                   ML_(pp_DW_children)(ht_ta->has_children),
 971                   ta_nf_n);
 972          TRACE_D3("  ");
 973          for (i = 0; i < ta_nf_n; i++)
 974             TRACE_D3("[%u,%u] ", ta->nf[i].skip_szB, ta->nf[i].next_nf);
 975          TRACE_D3("\n");
 976       }
 977    }
 978
 979    ML_(dinfo_free) (ta);
 980    #undef SZ_G_ABBV
 981 }
 982
 983 static g_abbv* get_abbv (const CUConst* cc, ULong abbv_code)
 984 {
 985    g_abbv *abbv;
 986
 987    abbv = VG_(HT_lookup) (cc->ht_abbvs, abbv_code);
 988    if (!abbv)
 989       cc->barf ("abbv_code not found in ht_abbvs table");
 990    return abbv;
 991 }
 992
 993 /* Free the memory allocated in CUConst. */
 994 static void clear_CUConst (CUConst* cc)
 995 {
 996    VG_(HT_destruct) ( cc->ht_abbvs, ML_(dinfo_free));
 997    cc->ht_abbvs = NULL;
 998 }
 999
1000 /* Parse the Compilation Unit header indicated at 'c' and
1001    initialise 'cc' accordingly. */
1002 static __attribute__((noinline))
1003 void parse_CU_Header ( /*OUT*/CUConst* cc,
1004                        Bool td3,
1005                        Cursor* c,
1006                        DiSlice escn_debug_abbv,
1007                        Bool type_unit,
1008                        Bool alt_info )
1009 {
1010    UChar  address_size;
1011    ULong  debug_abbrev_offset;
1012
1013    VG_(memset)(cc, 0, sizeof(*cc));
1014    vg_assert(c && c->barf);
1015    cc->barf = c->barf;
1016
1017    /* initial_length field */
1018    cc->unit_length
1019       = get_Initial_Length( &cc->is_dw64, c,
1020            "parse_CU_Header: invalid initial-length field" );
1021
1022    TRACE_D3("   Length:        %llu\n", cc->unit_length );
1023
1024    /* version */
1025    cc->version = get_UShort( c );
1026    if (cc->version != 2 && cc->version != 3 && cc->version != 4)
1027       cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
1028    TRACE_D3("   Version:       %d\n", (Int)cc->version );
1029
1030    /* debug_abbrev_offset */
1031    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1032    if (debug_abbrev_offset >= escn_debug_abbv.szB)
1033       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
1034    TRACE_D3("   Abbrev Offset: %llu\n", debug_abbrev_offset );
1035
1036    /* address size.  If this isn't equal to the host word size, just
1037       give up.  This makes it safe to assume elsewhere that
1038       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
1039       word. */
1040    address_size = get_UChar( c );
1041    if (address_size != sizeof(void*))
1042       cc->barf( "parse_CU_Header: invalid address_size" );
1043    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
1044
1045    cc->is_type_unit = type_unit;
1046    cc->is_alt_info = alt_info;
1047
1048    if (type_unit) {
1049       cc->type_signature = get_ULong( c );
1050       cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1051    }
1052
1053    /* Set up cc->debug_abbv to point to the relevant table for this
1054       CU.  Set its .szB so that at least we can't read off the end of
1055       the debug_abbrev section -- potentially (and quite likely) too
1056       big, if this isn't the last table in the section, but at least
1057       it's safe.
1058
1059       This amounts to taking debug_abbv_escn and moving the start
1060       position along by debug_abbrev_offset bytes, hence forming a
1061       smaller DiSlice which has the same end point.  Since we checked
1062       just above that debug_abbrev_offset is less than the size of
1063       debug_abbv_escn, this should leave us with a nonempty slice. */
1064    vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
1065    cc->debug_abbv      = escn_debug_abbv;
1066    cc->debug_abbv.ioff += debug_abbrev_offset;
1067    cc->debug_abbv.szB  -= debug_abbrev_offset;
1068
1069    init_ht_abbvs(cc, td3);
1070 }
1071
1072 /* This represents a single signatured type.  It maps a type signature
1073    (a ULong) to a cooked DIE offset.  Objects of this type are stored
1074    in the type signature hash table.  */
1075 typedef
1076    struct D3SignatureType {
1077       struct D3SignatureType *next;
1078       UWord data;
1079       ULong type_signature;
1080       UWord die;
1081    }
1082    D3SignatureType;
1083
1084 /* Record a signatured type in the hash table.  */
1085 static void record_signatured_type ( VgHashTable *tab,
1086                                      ULong type_signature,
1087                                      UWord die )
1088 {
1089    D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
1090                                                  sizeof(D3SignatureType) );
1091    dstype->data = (UWord) type_signature;
1092    dstype->type_signature = type_signature;
1093    dstype->die = die;
1094    VG_(HT_add_node) ( tab, dstype );
1095 }
1096
1097 /* Given a type signature hash table and a type signature, return the
1098    cooked DIE offset of the type.  If the type cannot be found, call
1099    BARF.  */
1100 static UWord lookup_signatured_type ( const VgHashTable *tab,
1101                                       ULong type_signature,
1102                                       void (*barf)( const HChar* ) __attribute__((noreturn)) )
1103 {
1104    D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
1105    /* This may be unwarranted chumminess with the hash table
1106       implementation.  */
1107    while ( dstype != NULL && dstype->type_signature != type_signature)
1108       dstype = dstype->next;
1109    if (dstype == NULL) {
1110       barf("lookup_signatured_type: could not find signatured type");
1111       /*NOTREACHED*/
1112       vg_assert(0);
1113    }
1114    return dstype->die;
1115 }
1116
1117
1118 /* Represents Form data.  If szB is 1/2/4/8 then the result is in the
1119    lowest 1/2/4/8 bytes of u.val.  If szB is zero or negative then the
1120    result is an image section beginning at u.cur and with size -szB.
1121    No other szB values are allowed. */
1122 typedef
1123    struct {
1124       Long szB; // 1, 2, 4, 8 or non-positive values only.
1125       union { ULong val; DiCursor cur; } u;
1126    }
1127    FormContents;
1128
1129 /* From 'c', get the Form data into 'cts'.  Either it gets a 1/2/4/8
1130    byte scalar value, or (a reference to) zero or more bytes starting
1131    at a DiCursor.*/
1132 static
1133 void get_Form_contents ( /*OUT*/FormContents* cts,
1134                          const CUConst* cc, Cursor* c,
1135                          Bool td3, DW_FORM form )
1136 {
1137    VG_(bzero_inline)(cts, sizeof(*cts));
1138    // !!! keep switch in sync with get_Form_szB. The nr of characters read below
1139    // must be computed similarly in get_Form_szB.
1140    // The consistency is verified in trace_DIE.
1141    switch (form) {
1142       case DW_FORM_data1:
1143          cts->u.val = (ULong)(UChar)get_UChar(c);
1144          cts->szB   = 1;
1145          TRACE_D3("%u", (UInt)cts->u.val);
1146          break;
1147       case DW_FORM_data2:
1148          cts->u.val = (ULong)(UShort)get_UShort(c);
1149          cts->szB   = 2;
1150          TRACE_D3("%u", (UInt)cts->u.val);
1151          break;
1152       case DW_FORM_data4:
1153          cts->u.val = (ULong)(UInt)get_UInt(c);
1154          cts->szB   = 4;
1155          TRACE_D3("%u", (UInt)cts->u.val);
1156          break;
1157       case DW_FORM_data8:
1158          cts->u.val = get_ULong(c);
1159          cts->szB   = 8;
1160          TRACE_D3("%llu", cts->u.val);
1161          break;
1162       case DW_FORM_sec_offset:
1163          cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
1164          cts->szB   = cc->is_dw64 ? 8 : 4;
1165          TRACE_D3("%llu", cts->u.val);
1166          break;
1167       case DW_FORM_sdata:
1168          cts->u.val = (ULong)(Long)get_SLEB128(c);
1169          cts->szB   = 8;
1170          TRACE_D3("%llu", cts->u.val);
1171          break;
1172       case DW_FORM_udata:
1173          cts->u.val = (ULong)(Long)get_ULEB128(c);
1174          cts->szB   = 8;
1175          TRACE_D3("%llu", cts->u.val);
1176          break;
1177       case DW_FORM_addr:
1178          /* note, this is a hack.  DW_FORM_addr is defined as getting
1179             a word the size of the target machine as defined by the
1180             address_size field in the CU Header.  However,
1181             parse_CU_Header() rejects all inputs except those for
1182             which address_size == sizeof(Word), hence we can just
1183             treat it as a (host) Word.  */
1184          cts->u.val = (ULong)(UWord)get_UWord(c);
1185          cts->szB   = sizeof(UWord);
1186          TRACE_D3("0x%lx", (UWord)cts->u.val);
1187          break;
1188
1189       case DW_FORM_ref_addr:
1190          /* We make the same word-size assumption as DW_FORM_addr. */
1191          /* What does this really mean?  From D3 Sec 7.5.4,
1192             description of "reference", it would appear to reference
1193             some other DIE, by specifying the offset from the
1194             beginning of a .debug_info section.  The D3 spec mentions
1195             that this might be in some other shared object and
1196             executable.  But I don't see how the name of the other
1197             object/exe is specified.
1198
1199             At least for the DW_FORM_ref_addrs created by icc11, the
1200             references seem to be within the same object/executable.
1201             So for the moment we merely range-check, to see that they
1202             actually do specify a plausible offset within this
1203             object's .debug_info, and return the value unchanged.
1204
1205             In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1206             DWARF 3 and later, it is offset-sized.
1207          */
1208          if (cc->version == 2) {
1209             cts->u.val = (ULong)(UWord)get_UWord(c);
1210             cts->szB   = sizeof(UWord);
1211          } else {
1212             cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1213             cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1214          }
1215          TRACE_D3("0x%lx", (UWord)cts->u.val);
1216          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
1217          if (/* the following is surely impossible, but ... */
1218              !ML_(sli_is_valid)(cc->escn_debug_info)
1219              || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
1220             /* Hmm.  Offset is nonsensical for this object's .debug_info
1221                section.  Be safe and reject it. */
1222             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1223                      "outside .debug_info");
1224          }
1225          break;
1226
1227       case DW_FORM_strp: {
1228          /* this is an offset into .debug_str */
1229          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1230          if (!ML_(sli_is_valid)(cc->escn_debug_str)
1231              || uw >= cc->escn_debug_str.szB)
1232             cc->barf("get_Form_contents: DW_FORM_strp "
1233                      "points outside .debug_str");
1234          /* FIXME: check the entire string lies inside debug_str,
1235             not just the first byte of it. */
1236          DiCursor str
1237             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw );
1238          if (TD3) {
1239             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
1240             TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp);
1241             ML_(dinfo_free)(tmp);
1242          }
1243          cts->u.cur = str;
1244          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1245          break;
1246       }
1247       case DW_FORM_string: {
1248          DiCursor str = get_AsciiZ(c);
1249          if (TD3) {
1250             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
1251             TRACE_D3("%s", tmp);
1252             ML_(dinfo_free)(tmp);
1253          }
1254          cts->u.cur = str;
1255          /* strlen is safe because get_AsciiZ already 'vetted' the
1256             entire string */
1257          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1258          break;
1259       }
1260       case DW_FORM_ref1: {
1261          UChar u8   = get_UChar(c);
1262          UWord res  = cc->cu_start_offset + (UWord)u8;
1263          cts->u.val = (ULong)res;
1264          cts->szB   = sizeof(UWord);
1265          TRACE_D3("<%lx>", res);
1266          break;
1267       }
1268       case DW_FORM_ref2: {
1269          UShort u16 = get_UShort(c);
1270          UWord  res = cc->cu_start_offset + (UWord)u16;
1271          cts->u.val = (ULong)res;
1272          cts->szB   = sizeof(UWord);
1273          TRACE_D3("<%lx>", res);
1274          break;
1275       }
1276       case DW_FORM_ref4: {
1277          UInt  u32  = get_UInt(c);
1278          UWord res  = cc->cu_start_offset + (UWord)u32;
1279          cts->u.val = (ULong)res;
1280          cts->szB   = sizeof(UWord);
1281          TRACE_D3("<%lx>", res);
1282          break;
1283       }
1284       case DW_FORM_ref8: {
1285          ULong u64  = get_ULong(c);
1286          UWord res  = cc->cu_start_offset + (UWord)u64;
1287          cts->u.val = (ULong)res;
1288          cts->szB   = sizeof(UWord);
1289          TRACE_D3("<%lx>", res);
1290          break;
1291       }
1292       case DW_FORM_ref_udata: {
1293          ULong u64  = get_ULEB128(c);
1294          UWord res  = cc->cu_start_offset + (UWord)u64;
1295          cts->u.val = (ULong)res;
1296          cts->szB   = sizeof(UWord);
1297          TRACE_D3("<%lx>", res);
1298          break;
1299       }
1300       case DW_FORM_flag: {
1301          UChar u8 = get_UChar(c);
1302          TRACE_D3("%u", (UInt)u8);
1303          cts->u.val = (ULong)u8;
1304          cts->szB   = 1;
1305          break;
1306       }
1307       case DW_FORM_flag_present:
1308          TRACE_D3("1");
1309          cts->u.val = 1;
1310          cts->szB   = 1;
1311          break;
1312       case DW_FORM_block1: {
1313          ULong    u64b;
1314          ULong    u64   = (ULong)get_UChar(c);
1315          DiCursor block = get_DiCursor_from_Cursor(c);
1316          TRACE_D3("%llu byte block: ", u64);
1317          for (u64b = u64; u64b > 0; u64b--) {
1318             UChar u8 = get_UChar(c);
1319             TRACE_D3("%x ", (UInt)u8);
1320          }
1321          cts->u.cur = block;
1322          cts->szB   = - (Long)u64;
1323          break;
1324       }
1325       case DW_FORM_block2: {
1326          ULong    u64b;
1327          ULong    u64   = (ULong)get_UShort(c);
1328          DiCursor block = get_DiCursor_from_Cursor(c);
1329          TRACE_D3("%llu byte block: ", u64);
1330          for (u64b = u64; u64b > 0; u64b--) {
1331             UChar u8 = get_UChar(c);
1332             TRACE_D3("%x ", (UInt)u8);
1333          }
1334          cts->u.cur = block;
1335          cts->szB   = - (Long)u64;
1336          break;
1337       }
1338       case DW_FORM_block4: {
1339          ULong    u64b;
1340          ULong    u64   = (ULong)get_UInt(c);
1341          DiCursor block = get_DiCursor_from_Cursor(c);
1342          TRACE_D3("%llu byte block: ", u64);
1343          for (u64b = u64; u64b > 0; u64b--) {
1344             UChar u8 = get_UChar(c);
1345             TRACE_D3("%x ", (UInt)u8);
1346          }
1347          cts->u.cur = block;
1348          cts->szB   = - (Long)u64;
1349          break;
1350       }
1351       case DW_FORM_exprloc:
1352       case DW_FORM_block: {
1353          ULong    u64b;
1354          ULong    u64   = (ULong)get_ULEB128(c);
1355          DiCursor block = get_DiCursor_from_Cursor(c);
1356          TRACE_D3("%llu byte block: ", u64);
1357          for (u64b = u64; u64b > 0; u64b--) {
1358             UChar u8 = get_UChar(c);
1359             TRACE_D3("%x ", (UInt)u8);
1360          }
1361          cts->u.cur = block;
1362          cts->szB   = - (Long)u64;
1363          break;
1364       }
1365       case DW_FORM_ref_sig8: {
1366          ULong  u64b;
1367          ULong  signature = get_ULong (c);
1368          ULong  work = signature;
1369          TRACE_D3("8 byte signature: ");
1370          for (u64b = 8; u64b > 0; u64b--) {
1371             UChar u8 = work & 0xff;
1372             TRACE_D3("%x ", (UInt)u8);
1373             work >>= 8;
1374          }
1375
1376          /* cc->signature_types is only built/initialised when
1377             VG_(clo_read_var_info) is set. In this case,
1378             the DW_FORM_ref_sig8 can be looked up.
1379             But we can also arrive here when only reading inline info
1380             and VG_(clo_trace_symtab) is set. In such a case,
1381             we cannot lookup the DW_FORM_ref_sig8, we rather assign
1382             a dummy value. This is a kludge, but otherwise,
1383             the 'dwarf inline info reader' tracing would have to
1384             do type processing/reading. It is better to avoid
1385             adding significant 'real' processing only due to tracing. */
1386          if (VG_(clo_read_var_info)) {
1387             /* Due to the way that the hash table is constructed, the
1388                resulting DIE offset here is already "cooked".  See
1389                cook_die_using_form.  */
1390             cts->u.val = lookup_signatured_type (cc->signature_types, signature,
1391                                                  c->barf);
1392          } else {
1393             vg_assert (td3);
1394             vg_assert (VG_(clo_read_inline_info));
1395             TRACE_D3("<not dereferencing signature type>");
1396             cts->u.val = 0; /* Assign a dummy/rubbish value */
1397          }
1398          cts->szB   = sizeof(UWord);
1399          break;
1400       }
1401       case DW_FORM_indirect:
1402          get_Form_contents (cts, cc, c, td3, (DW_FORM)get_ULEB128(c));
1403          return;
1404
1405       case DW_FORM_GNU_ref_alt:
1406          cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1407          cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1408          TRACE_D3("0x%lx", (UWord)cts->u.val);
1409          if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
1410          if (/* the following is surely impossible, but ... */
1411              !ML_(sli_is_valid)(cc->escn_debug_info_alt))
1412             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
1413                      "but no alternate .debug_info");
1414          else if (cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
1415             /* Hmm.  Offset is nonsensical for this object's .debug_info
1416                section.  Be safe and reject it. */
1417             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
1418                      "outside alternate .debug_info");
1419          }
1420          break;
1421
1422       case DW_FORM_GNU_strp_alt: {
1423          /* this is an offset into alternate .debug_str */
1424          SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1425          if (!ML_(sli_is_valid)(cc->escn_debug_str_alt))
1426             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
1427                      "but no alternate .debug_str");
1428          else if (uw >= cc->escn_debug_str_alt.szB)
1429             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1430                      "points outside alternate .debug_str");
1431          /* FIXME: check the entire string lies inside debug_str,
1432             not just the first byte of it. */
1433          DiCursor str
1434             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
1435          if (TD3) {
1436             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
1437             TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
1438             ML_(dinfo_free)(tmp);
1439          }
1440          cts->u.cur = str;
1441          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1442          break;
1443       }
1444
1445       default:
1446          VG_(printf)(
1447             "get_Form_contents: unhandled %u (%s) at <%llx>\n",
1448             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1449          c->barf("get_Form_contents: unhandled DW_FORM");
1450    }
1451 }
1452
1453 static inline UInt sizeof_Dwarfish_UWord (Bool is_dw64)
1454 {
1455    if (is_dw64)
1456       return sizeof(ULong);
1457    else
1458       return sizeof(UInt);
1459 }
1460
1461 #define VARSZ_FORM 0xffffffff
1462 /* If the form is a fixed length form, return the nr of bytes for this form.
1463    If the form is a variable length form, return VARSZ_FORM. */
1464 static
1465 UInt get_Form_szB (const CUConst* cc, DW_FORM form )
1466 {
1467    // !!! keep switch in sync with get_Form_contents : the nr of bytes
1468    // read from a cursor by get_Form_contents must be returned by
1469    // the below switch.
1470    // The consistency is verified in trace_DIE.
1471    switch (form) {
1472       case DW_FORM_data1: return 1;
1473       case DW_FORM_data2: return 2;
1474       case DW_FORM_data4: return 4;
1475       case DW_FORM_data8: return 8;
1476       case DW_FORM_sec_offset:
1477          if (cc->is_dw64)
1478             return 8;
1479          else
1480             return 4;
1481       case DW_FORM_sdata:
1482          return VARSZ_FORM;
1483       case DW_FORM_udata:
1484          return VARSZ_FORM;
1485       case DW_FORM_addr: // See hack in get_Form_contents
1486          return sizeof(UWord);
1487       case DW_FORM_ref_addr: // See hack in get_Form_contents
1488          if (cc->version == 2)
1489             return sizeof(UWord);
1490          else
1491             return sizeof_Dwarfish_UWord (cc->is_dw64);
1492       case DW_FORM_strp:
1493          return sizeof_Dwarfish_UWord (cc->is_dw64);
1494       case DW_FORM_string:
1495          return VARSZ_FORM;
1496       case DW_FORM_ref1:
1497          return 1;
1498       case DW_FORM_ref2:
1499          return 2;
1500       case DW_FORM_ref4:
1501          return 4;
1502       case DW_FORM_ref8:
1503          return 8;
1504       case DW_FORM_ref_udata:
1505          return VARSZ_FORM;
1506       case DW_FORM_flag:
1507          return 1;
1508       case DW_FORM_flag_present:
1509          return 0; // !!! special case, no data.
1510       case DW_FORM_block1:
1511          return VARSZ_FORM;
1512       case DW_FORM_block2:
1513          return VARSZ_FORM;
1514       case DW_FORM_block4:
1515          return VARSZ_FORM;
1516       case DW_FORM_exprloc:
1517       case DW_FORM_block:
1518          return VARSZ_FORM;
1519       case DW_FORM_ref_sig8:
1520          return 8;
1521       case DW_FORM_indirect:
1522          return VARSZ_FORM;
1523       case DW_FORM_GNU_ref_alt:
1524          return sizeof_Dwarfish_UWord(cc->is_dw64);
1525       case DW_FORM_GNU_strp_alt:
1526          return sizeof_Dwarfish_UWord(cc->is_dw64);
1527       default:
1528          VG_(printf)(
1529             "get_Form_szB: unhandled %u (%s)\n",
1530             form, ML_(pp_DW_FORM)(form));
1531          cc->barf("get_Form_contents: unhandled DW_FORM");
1532    }
1533 }
1534
1535 /* Skip a DIE as described by abbv.
1536    If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
1537 static
1538 void skip_DIE (UWord  *sibling,
1539                Cursor* c_die,
1540                const g_abbv *abbv,
1541                const CUConst* cc)
1542 {
1543    UInt nf_i;
1544    FormContents cts;
1545    nf_i = 0;
1546    while (True) {
1547       if (abbv->nf[nf_i].at_name == DW_AT_sibling) {
1548          get_Form_contents( &cts, cc, c_die, False /*td3*/,
1549                             (DW_FORM)abbv->nf[nf_i].at_form );
1550          if ( cts.szB > 0 )
1551             *sibling = cts.u.val;
1552          nf_i++;
1553       } else if (abbv->nf[nf_i].skip_szB == VARSZ_FORM) {
1554          get_Form_contents( &cts, cc, c_die, False /*td3*/,
1555                             (DW_FORM)abbv->nf[nf_i].at_form );
1556          nf_i++;
1557       } else {
1558          advance_position_of_Cursor (c_die, (ULong)abbv->nf[nf_i].skip_szB);
1559          nf_i = abbv->nf[nf_i].next_nf;
1560       }
1561       if (nf_i == 0)
1562          break;
1563    }
1564 }
1565
1566
1567 /*------------------------------------------------------------*/
1568 /*---                                                      ---*/
1569 /*--- Parsing of variable-related DIEs                     ---*/
1570 /*---                                                      ---*/
1571 /*------------------------------------------------------------*/
1572
1573 typedef
1574    struct _TempVar {
1575       const HChar*  name; /* in DebugInfo's .strpool */
1576       /* Represent ranges economically.  nRanges is the number of
1577          ranges.  Cases:
1578          0: .rngOneMin .rngOneMax .manyRanges are all zero
1579          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1580          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1581          This is merely an optimisation to avoid having to allocate
1582          and free the XArray in the common (98%) of cases where there
1583          is zero or one address ranges. */
1584       UWord   nRanges;
1585       Addr    rngOneMin;
1586       Addr    rngOneMax;
1587       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
1588       /* Do not free .rngMany, since many TempVars will have the same
1589          value.  Instead the associated storage is to be freed by
1590          deleting 'rangetree', which stores a single copy of each
1591          range. */
1592       /* --- */
1593       Int     level;
1594       UWord   typeR; /* a cuOff */
1595       GExpr*  gexpr; /* for this variable */
1596       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1597                         any */
1598       UInt    fndn_ix; /* declaring file/dirname index in fndnpool, or 0 */
1599       Int     fLine; /* declaring file line number, or zero */
1600       /* offset in .debug_info, so that abstract instances can be
1601          found to satisfy references from concrete instances. */
1602       UWord   dioff;
1603       UWord   absOri; /* so the absOri fields refer to dioff fields
1604                          in some other, related TempVar. */
1605    }
1606    TempVar;
1607
1608 typedef
1609    struct {
1610       /* Contains the range stack: a stack of address ranges, one
1611          stack entry for each nested scope.
1612
1613          Some scope entries are created by function definitions
1614          (DW_AT_subprogram), and for those, we also note the GExpr
1615          derived from its DW_AT_frame_base attribute, if any.
1616          Consequently it should be possible to find, for any
1617          variable's DIE, the GExpr for the containing function's
1618          DW_AT_frame_base by scanning back through the stack to find
1619          the nearest entry associated with a function.  This somewhat
1620          elaborate scheme is provided so as to make it possible to
1621          obtain the correct DW_AT_frame_base expression even in the
1622          presence of nested functions (or to be more precise, in the
1623          presence of nested DW_AT_subprogram DIEs).
1624       */
1625       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1626                      stack */
1627       Int     stack_size;
1628       XArray **ranges; /* XArray of AddrRange */
1629       Int     *level;  /* D3 DIE levels */
1630       Bool    *isFunc; /* from DW_AT_subprogram? */
1631       GExpr  **fbGX;   /* if isFunc, contains the FB expr, else NULL */
1632       /* The fndn_ix file name/dirname table.  Is a mapping from dwarf
1633          integer index to the index in di->fndnpool. */
1634       XArray* /* of UInt* */ fndn_ix_Table;
1635    }
1636    D3VarParser;
1637
1638 /* Completely initialise a variable parser object */
1639 static void
1640 var_parser_init ( D3VarParser *parser )
1641 {
1642    parser->sp = -1;
1643    parser->stack_size = 0;
1644    parser->ranges = NULL;
1645    parser->level  = NULL;
1646    parser->isFunc = NULL;
1647    parser->fbGX = NULL;
1648    parser->fndn_ix_Table = NULL;
1649 }
1650
1651 /* Release any memory hanging off a variable parser object */
1652 static void
1653 var_parser_release ( D3VarParser *parser )
1654 {
1655    ML_(dinfo_free)( parser->ranges );
1656    ML_(dinfo_free)( parser->level );
1657    ML_(dinfo_free)( parser->isFunc );
1658    ML_(dinfo_free)( parser->fbGX );
1659 }
1660
1661 static void varstack_show ( const D3VarParser* parser, const HChar* str )
1662 {
1663    Word i, j;
1664    VG_(printf)("  varstack (%s) {\n", str);
1665    for (i = 0; i <= parser->sp; i++) {
1666       XArray* xa = parser->ranges[i];
1667       vg_assert(xa);
1668       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1669       if (parser->isFunc[i]) {
1670          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1671       } else {
1672          vg_assert(parser->fbGX[i] == NULL);
1673       }
1674       VG_(printf)(": ");
1675       if (VG_(sizeXA)( xa ) == 0) {
1676          VG_(printf)("** empty PC range array **");
1677       } else {
1678          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1679             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1680             vg_assert(range);
1681             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1682          }
1683       }
1684       VG_(printf)("\n");
1685    }
1686    VG_(printf)("  }\n");
1687 }
1688
1689 /* Remove from the stack, all entries with .level > 'level' */
1690 static
1691 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1692 {
1693    Bool changed = False;
1694    vg_assert(parser->sp < parser->stack_size);
1695    while (True) {
1696       vg_assert(parser->sp >= -1);
1697       if (parser->sp == -1) break;
1698       if (parser->level[parser->sp] <= level) break;
1699       if (0)
1700          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1701       vg_assert(parser->ranges[parser->sp]);
1702       /* Who allocated this xa?  get_range_list() or
1703          unitary_range_list(). */
1704       VG_(deleteXA)( parser->ranges[parser->sp] );
1705       parser->sp--;
1706       changed = True;
1707    }
1708    if (changed && td3)
1709       varstack_show( parser, "after preen" );
1710 }
1711
1712 static void varstack_push ( const CUConst* cc,
1713                             D3VarParser* parser,
1714                             Bool td3,
1715                             XArray* ranges, Int level,
1716                             Bool    isFunc, GExpr* fbGX ) {
1717    if (0)
1718    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1719             parser->sp+1, level, ranges);
1720
1721    /* First we need to zap everything >= 'level', as we are about to
1722       replace any previous entry at 'level', so .. */
1723    varstack_preen(parser, /*td3*/False, level-1);
1724
1725    vg_assert(parser->sp >= -1);
1726    vg_assert(parser->sp < parser->stack_size);
1727    if (parser->sp == parser->stack_size - 1) {
1728       parser->stack_size += 48;
1729       parser->ranges =
1730          ML_(dinfo_realloc)("di.readdwarf3.varpush.1", parser->ranges,
1731                             parser->stack_size * sizeof parser->ranges[0]);
1732       parser->level =
1733          ML_(dinfo_realloc)("di.readdwarf3.varpush.2", parser->level,
1734                             parser->stack_size * sizeof parser->level[0]);
1735       parser->isFunc =
1736          ML_(dinfo_realloc)("di.readdwarf3.varpush.3", parser->isFunc,
1737                             parser->stack_size * sizeof parser->isFunc[0]);
1738       parser->fbGX =
1739          ML_(dinfo_realloc)("di.readdwarf3.varpush.4", parser->fbGX,
1740                             parser->stack_size * sizeof parser->fbGX[0]);
1741    }
1742    if (parser->sp >= 0)
1743       vg_assert(parser->level[parser->sp] < level);
1744    parser->sp++;
1745    vg_assert(ranges != NULL);
1746    if (!isFunc) vg_assert(fbGX == NULL);
1747    parser->ranges[parser->sp] = ranges;
1748    parser->level[parser->sp]  = level;
1749    parser->isFunc[parser->sp] = isFunc;
1750    parser->fbGX[parser->sp]   = fbGX;
1751    if (TD3)
1752       varstack_show( parser, "after push" );
1753 }
1754
1755
1756 /* cts is derived from a DW_AT_location and so refers either to a
1757    location expression or to a location list.  Figure out which, and
1758    in both cases bundle the expression or location list into a
1759    so-called GExpr (guarded expression). */
1760 __attribute__((noinline))
1761 static GExpr* get_GX ( const CUConst* cc, Bool td3, const FormContents* cts )
1762 {
1763    GExpr* gexpr = NULL;
1764    if (cts->szB < 0) {
1765       /* represents a non-empty in-line location expression, and
1766          cts->u.cur points at the image bytes */
1767       gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
1768    }
1769    else
1770    if (cts->szB > 0) {
1771       /* represents a location list.  cts->u.val is the offset of it
1772          in .debug_loc. */
1773       if (!cc->cu_svma_known)
1774          cc->barf("get_GX: location list, but CU svma is unknown");
1775       gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
1776    }
1777    else {
1778       vg_assert(0); /* else caller is bogus */
1779    }
1780    return gexpr;
1781 }
1782
1783 /* Returns an xarray* of directory names (indexed by the dwarf dirname
1784    integer).
1785    If 'compdir' is NULL, entry [0] will be set to "."
1786    otherwise entry [0] is set to compdir.
1787    Entry [0] basically means "the current directory of the compilation",
1788    whatever that means, according to the DWARF3 spec.
1789    FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
1790 static
1791 XArray* read_dirname_xa (DebugInfo* di, const HChar *compdir,
1792                          Cursor *c,
1793                          Bool td3 )
1794 {
1795    XArray*        dirname_xa;   /* xarray of HChar* dirname */
1796    const HChar*   dirname;
1797    UInt           compdir_len;
1798
1799    dirname_xa = VG_(newXA) (ML_(dinfo_zalloc), "di.rdxa.1", ML_(dinfo_free),
1800                             sizeof(HChar*) );
1801
1802    if (compdir == NULL) {
1803       dirname = ".";
1804       compdir_len = 1;
1805    } else {
1806       dirname = compdir;
1807       compdir_len = VG_(strlen)(compdir);
1808    }
1809    VG_(addToXA) (dirname_xa, &dirname);
1810
1811    TRACE_D3(" The Directory Table%s\n",
1812             peek_UChar(c) == 0 ? " is empty." : ":" );
1813
1814    while (peek_UChar(c) != 0) {
1815
1816       DiCursor cur = get_AsciiZ(c);
1817       HChar* data_str = ML_(cur_read_strdup)( cur, "dirname_xa.1" );
1818       TRACE_D3("  %s\n", data_str);
1819
1820       /* If data_str[0] is '/', then 'data' is an absolute path and we
1821          don't mess with it.  Otherwise, construct the
1822          path 'compdir' ++ "/" ++ 'data'. */
1823
1824       if (data_str[0] != '/'
1825           /* not an absolute path */
1826           && compdir
1827           /* actually got something sensible for compdir */
1828           && compdir_len)
1829       {
1830          SizeT  len = compdir_len + 1 + VG_(strlen)(data_str);
1831          HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1);
1832
1833          VG_(strcpy)(buf, compdir);
1834          VG_(strcat)(buf, "/");
1835          VG_(strcat)(buf, data_str);
1836
1837          dirname = ML_(addStr)(di, buf, len);
1838          VG_(addToXA) (dirname_xa, &dirname);
1839          if (0) VG_(printf)("rel path  %s\n", buf);
1840          ML_(dinfo_free)(buf);
1841       } else {
1842          /* just use 'data'. */
1843          dirname = ML_(addStr)(di,data_str,-1);
1844          VG_(addToXA) (dirname_xa, &dirname);
1845          if (0) VG_(printf)("abs path  %s\n", data_str);
1846       }
1847
1848       ML_(dinfo_free)(data_str);
1849    }
1850
1851    TRACE_D3 ("\n");
1852
1853    if (get_UChar (c) != 0) {
1854       ML_(symerr)(NULL, True,
1855                   "could not get NUL at end of DWARF directory table");
1856       VG_(deleteXA)(dirname_xa);
1857       return NULL;
1858    }
1859
1860    return dirname_xa;
1861 }
1862
1863 static
1864 void read_filename_table( /*MOD*/XArray* /* of UInt* */ fndn_ix_Table,
1865                           const HChar* compdir,
1866                           const CUConst* cc, ULong debug_line_offset,
1867                           Bool td3 )
1868 {
1869    Bool   is_dw64;
1870    Cursor c;
1871    Word   i;
1872    UShort version;
1873    UChar  opcode_base;
1874    const HChar* str;
1875    XArray* dirname_xa;   /* xarray of HChar* dirname */
1876    ULong  dir_xa_ix;     /* Index in dirname_xa, as read from dwarf info. */
1877    const HChar* dirname;
1878    UInt   fndn_ix;
1879
1880    vg_assert(fndn_ix_Table && cc && cc->barf);
1881    if (!ML_(sli_is_valid)(cc->escn_debug_line)
1882        || cc->escn_debug_line.szB <= debug_line_offset) {
1883       cc->barf("read_filename_table: .debug_line is missing?");
1884    }
1885
1886    init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
1887                 "Overrun whilst reading .debug_line section(1)" );
1888
1889    /* unit_length = */
1890    get_Initial_Length( &is_dw64, &c,
1891                        "read_filename_table: invalid initial-length field" );
1892    version = get_UShort( &c );
1893    if (version != 2 && version != 3 && version != 4)
1894      cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1895               "is currently supported.");
1896    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1897    /*minimum_instruction_length = */ get_UChar( &c );
1898    if (version >= 4)
1899       /*maximum_operations_per_insn = */ get_UChar( &c );
1900    /*default_is_stmt            = */ get_UChar( &c );
1901    /*line_base                  = (Char)*/ get_UChar( &c );
1902    /*line_range                 = */ get_UChar( &c );
1903    opcode_base                = get_UChar( &c );
1904    /* skip over "standard_opcode_lengths" */
1905    for (i = 1; i < (Word)opcode_base; i++)
1906      (void)get_UChar( &c );
1907
1908    dirname_xa = read_dirname_xa(cc->di, compdir, &c, td3);
1909
1910    /* Read and record the file names table */
1911    vg_assert( VG_(sizeXA)( fndn_ix_Table ) == 0 );
1912    /* Add a dummy index-zero entry.  DWARF3 numbers its files
1913       from 1, for some reason. */
1914    fndn_ix = ML_(addFnDn) ( cc->di, "<unknown_file>", NULL );
1915    VG_(addToXA)( fndn_ix_Table, &fndn_ix );
1916    while (peek_UChar(&c) != 0) {
1917       DiCursor cur = get_AsciiZ(&c);
1918       str = ML_(addStrFromCursor)( cc->di, cur );
1919       dir_xa_ix = get_ULEB128( &c );
1920       if (dirname_xa != NULL
1921           && dir_xa_ix >= 0 && dir_xa_ix < VG_(sizeXA) (dirname_xa))
1922          dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
1923       else
1924          dirname = NULL;
1925       fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
1926       TRACE_D3("  read_filename_table: %ld fndn_ix %u %s %s\n",
1927                VG_(sizeXA)(fndn_ix_Table), fndn_ix,
1928                dirname, str);
1929       VG_(addToXA)( fndn_ix_Table, &fndn_ix );
1930       (void)get_ULEB128( &c ); /* skip last mod time */
1931       (void)get_ULEB128( &c ); /* file size */
1932    }
1933    /* We're done!  The rest of it is not interesting. */
1934    if (dirname_xa != NULL)
1935       VG_(deleteXA)(dirname_xa);
1936 }
1937
1938 /* setup_cu_svma to be called when a cu is found at level 0,
1939    to establish the cu_svma. */
1940 static void setup_cu_svma(CUConst* cc, Bool have_lo, Addr ip_lo, Bool td3)
1941 {
1942    Addr cu_svma;
1943    /* We have potentially more than one type of parser parsing the
1944       dwarf information. At least currently, each parser establishes
1945       the cu_svma. So, in case cu_svma_known, we check that the same
1946       result is obtained by the 2nd parsing of the cu.
1947
1948       Alternatively, we could reset cu_svma_known after each parsing
1949       and then check that we only see a single DW_TAG_compile_unit DIE
1950       at level 0, DWARF3 only allows exactly one top level DIE per
1951       CU. */
1952
1953    if (have_lo)
1954       cu_svma = ip_lo;
1955    else {
1956       /* Now, it may be that this DIE doesn't tell us the CU's
1957          SVMA, by way of not having a DW_AT_low_pc.  That's OK --
1958          the CU doesn't *have* to have its SVMA specified.
1959
1960          But as per last para D3 spec sec 3.1.1 ("Normal and
1961          Partial Compilation Unit Entries", "If the base address
1962          (viz, the SVMA) is undefined, then any DWARF entry of
1963          structure defined interms of the base address of that
1964          compilation unit is not valid.".  So that means, if whilst
1965          processing the children of this top level DIE (or their
1966          children, etc) we see a DW_AT_range, and cu_svma_known is
1967          False, then the DIE that contains it is (per the spec)
1968          invalid, and we can legitimately stop and complain. */
1969       /* .. whereas The Reality is, simply assume the SVMA is zero
1970          if it isn't specified. */
1971       cu_svma = 0;
1972    }
1973
1974    if (cc->cu_svma_known) {
1975       vg_assert (cu_svma == cc->cu_svma);
1976    } else {
1977       cc->cu_svma_known = True;
1978       cc->cu_svma = cu_svma;
1979       if (0)
1980          TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc->cu_svma);
1981    }
1982 }
1983
1984 static void trace_DIE(
1985    DW_TAG dtag,
1986    UWord posn,
1987    Int level,
1988    UWord saved_die_c_offset,
1989    const g_abbv *abbv,
1990    const CUConst* cc)
1991 {
1992    Cursor c;
1993    FormContents cts;
1994    UWord sibling = 0;
1995    UInt nf_i;
1996    Bool  debug_types_flag;
1997    Bool  alt_flag;
1998    Cursor check_skip;
1999    UWord check_sibling = 0;
2000
2001    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
2002    init_Cursor (&c,
2003                 debug_types_flag ? cc->escn_debug_types :
2004                 alt_flag ? cc->escn_debug_info_alt : cc->escn_debug_info,
2005                 saved_die_c_offset, cc->barf,
2006                 "Overrun trace_DIE");
2007    check_skip = c;
2008    VG_(printf)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
2009                level, posn, (ULong) abbv->abbv_code, ML_(pp_DW_TAG)( dtag ),
2010                debug_types_flag ? " (in .debug_types)" : "",
2011                alt_flag ? " (in alternate .debug_info)" : "");
2012    nf_i = 0;
2013    while (True) {
2014       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2015       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2016       nf_i++;
2017       if (attr == 0 && form == 0) break;
2018       VG_(printf)("     %-18s: ", ML_(pp_DW_AT)(attr));
2019       /* Get the form contents, so as to print them */
2020       get_Form_contents( &cts, cc, &c, True, form );
2021       if (attr == DW_AT_sibling && cts.szB > 0) {
2022          sibling = cts.u.val;
2023       }
2024       VG_(printf)("\t\n");
2025    }
2026
2027    /* Verify that skipping a DIE gives the same displacement as
2028       tracing (i.e. reading) a DIE. If there is an inconsistency in
2029       the nr of bytes read by get_Form_contents and get_Form_szB, this
2030       should be detected by the below. Using --trace-symtab=yes
2031       --read-var-info=yes will ensure all DIEs are systematically
2032       verified. */
2033    skip_DIE (&check_sibling, &check_skip, abbv, cc);
2034    vg_assert (check_sibling == sibling);
2035    vg_assert (get_position_of_Cursor (&check_skip)
2036               == get_position_of_Cursor (&c));
2037 }
2038
2039 __attribute__((noreturn))
2040 static void dump_bad_die_and_barf(
2041    const HChar *whichparser,
2042    DW_TAG dtag,
2043    UWord posn,
2044    Int level,
2045    Cursor* c_die,
2046    UWord saved_die_c_offset,
2047    const g_abbv *abbv,
2048    const CUConst* cc)
2049 {
2050    trace_DIE (dtag, posn, level, saved_die_c_offset, abbv, cc);
2051    VG_(printf)("%s:\n", whichparser);
2052    cc->barf("confused by the above DIE");
2053 }
2054
2055 __attribute__((noinline))
2056 static void bad_DIE_confusion(int linenr)
2057 {
2058    VG_(printf)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr);
2059 }
2060 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
2061
2062 __attribute__((noinline))
2063 static void parse_var_DIE (
2064    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
2065    /*MOD*/XArray* /* of TempVar* */ tempvars,
2066    /*MOD*/XArray* /* of GExpr* */ gexprs,
2067    /*MOD*/D3VarParser* parser,
2068    DW_TAG dtag,
2069    UWord posn,
2070    Int level,
2071    Cursor* c_die,
2072    const g_abbv *abbv,
2073    CUConst* cc,
2074    Bool td3
2075 )
2076 {
2077    FormContents cts;
2078    UInt nf_i;
2079
2080    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2081
2082    varstack_preen( parser, td3, level-1 );
2083
2084    if (dtag == DW_TAG_compile_unit
2085        || dtag == DW_TAG_type_unit
2086        || dtag == DW_TAG_partial_unit) {
2087       Bool have_lo    = False;
2088       Bool have_hi1   = False;
2089       Bool hiIsRelative = False;
2090       Bool have_range = False;
2091       Addr ip_lo    = 0;
2092       Addr ip_hi1   = 0;
2093       Addr rangeoff = 0;
2094       const HChar *compdir = NULL;
2095       nf_i = 0;
2096       while (True) {
2097          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2098          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2099          nf_i++;
2100          if (attr == 0 && form == 0) break;
2101          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2102          if (attr == DW_AT_low_pc && cts.szB > 0) {
2103             ip_lo   = cts.u.val;
2104             have_lo = True;
2105          }
2106          if (attr == DW_AT_high_pc && cts.szB > 0) {
2107             ip_hi1   = cts.u.val;
2108             have_hi1 = True;
2109             if (form != DW_FORM_addr)
2110                hiIsRelative = True;
2111          }
2112          if (attr == DW_AT_ranges && cts.szB > 0) {
2113             rangeoff   = cts.u.val;
2114             have_range = True;
2115          }
2116          if (attr == DW_AT_comp_dir) {
2117             if (cts.szB >= 0)
2118                cc->barf("parse_var_DIE compdir: expecting indirect string");
2119             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2120                                                "parse_var_DIE.compdir" );
2121             compdir = ML_(addStr)(cc->di, str, -1);
2122             ML_(dinfo_free) (str);
2123          }
2124          if (attr == DW_AT_stmt_list && cts.szB > 0) {
2125             read_filename_table( parser->fndn_ix_Table, compdir,
2126                                  cc, cts.u.val, td3 );
2127          }
2128       }
2129       if (have_lo && have_hi1 && hiIsRelative)
2130          ip_hi1 += ip_lo;
2131
2132       /* Now, does this give us an opportunity to find this
2133          CU's svma? */
2134       if (level == 0)
2135          setup_cu_svma(cc, have_lo, ip_lo, td3);
2136
2137       /* Do we have something that looks sane? */
2138       if (have_lo && have_hi1 && (!have_range)) {
2139          if (ip_lo < ip_hi1)
2140             varstack_push( cc, parser, td3,
2141                            unitary_range_list(ip_lo, ip_hi1 - 1),
2142                            level,
2143                            False/*isFunc*/, NULL/*fbGX*/ );
2144          else if (ip_lo == 0 && ip_hi1 == 0)
2145             /* CU has no code, presumably?
2146                Such situations have been encountered for code
2147                compiled with -ffunction-sections -fdata-sections
2148                and linked with --gc-sections. Completely
2149                eliminated CU gives such 0 lo/hi pc. Similarly
2150                to a CU which has no lo/hi/range pc, we push
2151                an empty range list. */
2152             varstack_push( cc, parser, td3,
2153                            empty_range_list(),
2154                            level,
2155                            False/*isFunc*/, NULL/*fbGX*/ );
2156       } else
2157       if ((!have_lo) && (!have_hi1) && have_range) {
2158          varstack_push( cc, parser, td3,
2159                         get_range_list( cc, td3,
2160                                         rangeoff, cc->cu_svma ),
2161                         level,
2162                         False/*isFunc*/, NULL/*fbGX*/ );
2163       } else
2164       if ((!have_lo) && (!have_hi1) && (!have_range)) {
2165          /* CU has no code, presumably? */
2166          varstack_push( cc, parser, td3,
2167                         empty_range_list(),
2168                         level,
2169                         False/*isFunc*/, NULL/*fbGX*/ );
2170       } else
2171       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
2172          /* broken DIE created by gcc-4.3.X ?  Ignore the
2173             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
2174             instead. */
2175          varstack_push( cc, parser, td3,
2176                         get_range_list( cc, td3,
2177                                         rangeoff, cc->cu_svma ),
2178                         level,
2179                         False/*isFunc*/, NULL/*fbGX*/ );
2180       } else {
2181          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
2182                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
2183          goto_bad_DIE;
2184       }
2185    }
2186
2187    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
2188       Bool   have_lo    = False;
2189       Bool   have_hi1   = False;
2190       Bool   have_range = False;
2191       Bool   hiIsRelative = False;
2192       Addr   ip_lo      = 0;
2193       Addr   ip_hi1     = 0;
2194       Addr   rangeoff   = 0;
2195       Bool   isFunc     = dtag == DW_TAG_subprogram;
2196       GExpr* fbGX       = NULL;
2197       nf_i = 0;
2198       while (True) {
2199          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2200          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2201          nf_i++;
2202          if (attr == 0 && form == 0) break;
2203          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2204          if (attr == DW_AT_low_pc && cts.szB > 0) {
2205             ip_lo   = cts.u.val;
2206             have_lo = True;
2207          }
2208          if (attr == DW_AT_high_pc && cts.szB > 0) {
2209             ip_hi1   = cts.u.val;
2210             have_hi1 = True;
2211             if (form != DW_FORM_addr)
2212                hiIsRelative = True;
2213          }
2214          if (attr == DW_AT_ranges && cts.szB > 0) {
2215             rangeoff   = cts.u.val;
2216             have_range = True;
2217          }
2218          if (isFunc
2219              && attr == DW_AT_frame_base
2220              && cts.szB != 0 /* either scalar or nonempty block */) {
2221             fbGX = get_GX( cc, False/*td3*/, &cts );
2222             vg_assert(fbGX);
2223             VG_(addToXA)(gexprs, &fbGX);
2224          }
2225       }
2226       if (have_lo && have_hi1 && hiIsRelative)
2227          ip_hi1 += ip_lo;
2228       /* Do we have something that looks sane? */
2229       if (dtag == DW_TAG_subprogram
2230           && (!have_lo) && (!have_hi1) && (!have_range)) {
2231          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
2232             representing a subroutine declaration that is not also a
2233             definition does not have code address or range
2234             attributes." */
2235       } else
2236       if (dtag == DW_TAG_lexical_block
2237           && (!have_lo) && (!have_hi1) && (!have_range)) {
2238          /* I believe this is legit, and means the lexical block
2239             contains no insns (whatever that might mean).  Ignore. */
2240       } else
2241       if (have_lo && have_hi1 && (!have_range)) {
2242          /* This scope supplies just a single address range. */
2243          if (ip_lo < ip_hi1)
2244             varstack_push( cc, parser, td3,
2245                            unitary_range_list(ip_lo, ip_hi1 - 1),
2246                            level, isFunc, fbGX );
2247       } else
2248       if ((!have_lo) && (!have_hi1) && have_range) {
2249          /* This scope supplies multiple address ranges via the use of
2250             a range list. */
2251          varstack_push( cc, parser, td3,
2252                         get_range_list( cc, td3,
2253                                         rangeoff, cc->cu_svma ),
2254                         level, isFunc, fbGX );
2255       } else
2256       if (have_lo && (!have_hi1) && (!have_range)) {
2257          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
2258             Entries) says fairly clearly that a scope must have either
2259             _range or (_low_pc and _high_pc). */
2260          /* The spec is a bit ambiguous though.  Perhaps a single byte
2261             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
2262          /* This case is here because icc9 produced this:
2263          <2><13bd>: DW_TAG_lexical_block
2264             DW_AT_decl_line   : 5229
2265             DW_AT_decl_column : 37
2266             DW_AT_decl_file   : 1
2267             DW_AT_low_pc      : 0x401b03
2268          */
2269          /* Ignore (seems safe than pushing a single byte range) */
2270       } else
2271          goto_bad_DIE;
2272    }
2273
2274    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
2275       const  HChar* name = NULL;
2276       UWord  typeR       = D3_INVALID_CUOFF;
2277       Bool   global      = False;
2278       GExpr* gexpr       = NULL;
2279       Int    n_attrs     = 0;
2280       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
2281       Int    lineNo      = 0;
2282       UInt   fndn_ix     = 0;
2283       nf_i = 0;
2284       while (True) {
2285          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2286          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2287          nf_i++;
2288          if (attr == 0 && form == 0) break;
2289          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2290          n_attrs++;
2291          if (attr == DW_AT_name && cts.szB < 0) {
2292             name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
2293          }
2294          if (attr == DW_AT_location
2295              && cts.szB != 0 /* either scalar or nonempty block */) {
2296             gexpr = get_GX( cc, False/*td3*/, &cts );
2297             vg_assert(gexpr);
2298             VG_(addToXA)(gexprs, &gexpr);
2299          }
2300          if (attr == DW_AT_type && cts.szB > 0) {
2301             typeR = cook_die_using_form( cc, cts.u.val, form );
2302          }
2303          if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
2304             global = True;
2305          }
2306          if (attr == DW_AT_abstract_origin && cts.szB > 0) {
2307             abs_ori = (UWord)cts.u.val;
2308          }
2309          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
2310             /*declaration = True;*/
2311          }
2312          if (attr == DW_AT_decl_line && cts.szB > 0) {
2313             lineNo = (Int)cts.u.val;
2314          }
2315          if (attr == DW_AT_decl_file && cts.szB > 0) {
2316             Int ftabIx = (Int)cts.u.val;
2317             if (ftabIx >= 1
2318                 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
2319                fndn_ix = *(UInt*)VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
2320             }
2321             if (0) VG_(printf)("XXX filename fndn_ix = %u %s\n", fndn_ix,
2322                                ML_(fndn_ix2filename) (cc->di, fndn_ix));
2323          }
2324       }
2325       if (!global && dtag == DW_TAG_variable && level == 1) {
2326          /* Case of a static variable. It is better to declare
2327             it global as the variable is not really related to
2328             a PC range, as its address can be used by program
2329             counters outside of the ranges where it is visible . */
2330          global = True;
2331       }
2332
2333       /* We'll collect it under if one of the following three
2334          conditions holds:
2335          (1) has location and type    -> completed
2336          (2) has type only            -> is an abstract instance
2337          (3) has location and abs_ori -> is a concrete instance
2338          Name, fndn_ix and line number are all optional frills.
2339       */
2340       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
2341            /* 2 */ || (typeR != D3_INVALID_CUOFF)
2342            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
2343
2344          /* Add this variable to the list of interesting looking
2345             variables.  Crucially, note along with it the address
2346             range(s) associated with the variable, which for locals
2347             will be the address ranges at the top of the varparser's
2348             stack. */
2349          GExpr*   fbGX = NULL;
2350          Word     i, nRanges;
2351          const XArray*  /* of AddrRange */ xa;
2352          TempVar* tv;
2353          /* Stack can't be empty; we put a dummy entry on it for the
2354             entire address range before starting with the DIEs for
2355             this CU. */
2356          vg_assert(parser->sp >= 0);
2357
2358          /* If this is a local variable (non-global), try to find
2359             the GExpr for the DW_AT_frame_base of the containing
2360             function.  It should have been pushed on the stack at the
2361             time we encountered its DW_TAG_subprogram DIE, so the way
2362             to find it is to scan back down the stack looking for it.
2363             If there isn't an enclosing stack entry marked 'isFunc'
2364             then we must be seeing variable or formal param DIEs
2365             outside of a function, so we deem the Dwarf to be
2366             malformed if that happens.  Note that the fbGX may be NULL
2367             if the containing DT_TAG_subprogram didn't supply a
2368             DW_AT_frame_base -- that's OK, but there must actually be
2369             a containing DW_TAG_subprogram. */
2370          if (!global) {
2371             Bool found = False;
2372             for (i = parser->sp; i >= 0; i--) {
2373                if (parser->isFunc[i]) {
2374                   fbGX = parser->fbGX[i];
2375                   found = True;
2376                   break;
2377                }
2378             }
2379             if (!found) {
2380                if (0 && VG_(clo_verbosity) >= 0) {
2381                   VG_(message)(Vg_DebugMsg,
2382                      "warning: parse_var_DIE: non-global variable "
2383                      "outside DW_TAG_subprogram\n");
2384                }
2385                /* goto_bad_DIE; */
2386                /* This seems to happen a lot.  Just ignore it -- if,
2387                   when we come to evaluation of the location (guarded)
2388                   expression, it requires a frame base value, and
2389                   there's no expression for that, then evaluation as a
2390                   whole will fail.  Harmless - a bit of a waste of
2391                   cycles but nothing more. */
2392             }
2393          }
2394
2395          /* re "global ? 0 : parser->sp" (twice), if the var is
2396             marked 'global' then we must put it at the global scope,
2397             as only the global scope (level 0) covers the entire PC
2398             address space.  It is asserted elsewhere that level 0
2399             always covers the entire address space. */
2400          xa = parser->ranges[global ? 0 : parser->sp];
2401          nRanges = VG_(sizeXA)(xa);
2402          vg_assert(nRanges >= 0);
2403
2404          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
2405          tv->name   = name;
2406          tv->level  = global ? 0 : parser->sp;
2407          tv->typeR  = typeR;
2408          tv->gexpr  = gexpr;
2409          tv->fbGX   = fbGX;
2410          tv->fndn_ix= fndn_ix;
2411          tv->fLine  = lineNo;
2412          tv->dioff  = posn;
2413          tv->absOri = abs_ori;
2414
2415          /* See explanation on definition of type TempVar for the
2416             reason for this elaboration. */
2417          tv->nRanges = nRanges;
2418          tv->rngOneMin = 0;
2419          tv->rngOneMax = 0;
2420          tv->rngMany = NULL;
2421          if (nRanges == 1) {
2422             AddrRange* range = VG_(indexXA)(xa, 0);
2423             tv->rngOneMin = range->aMin;
2424             tv->rngOneMax = range->aMax;
2425          }
2426          else if (nRanges > 1) {
2427             /* See if we already have a range list which is
2428                structurally identical.  If so, use that; if not, clone
2429                this one, and add it to our collection. */
2430             UWord keyW, valW;
2431             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
2432                XArray* old = (XArray*)keyW;
2433                vg_assert(valW == 0);
2434                vg_assert(old != xa);
2435                tv->rngMany = old;
2436             } else {
2437                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
2438                tv->rngMany = cloned;
2439                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
2440             }
2441          }
2442
2443          VG_(addToXA)( tempvars, &tv );
2444
2445          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
2446                   VG_(sizeXA)(xa) );
2447          /* collect stats on how effective the ->ranges special
2448             casing is */
2449          if (0) {
2450             static Int ntot=0, ngt=0;
2451             ntot++;
2452             if (tv->rngMany) ngt++;
2453             if (0 == (ntot % 100000))
2454                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
2455          }
2456
2457       }
2458
2459       /* Here are some other weird cases seen in the wild:
2460
2461             We have a variable with a name and a type, but no
2462             location.  I guess that's a sign that it has been
2463             optimised away.  Ignore it.  Here's an example:
2464
2465             static Int lc_compar(void* n1, void* n2) {
2466                MC_Chunk* mc1 = *(MC_Chunk**)n1;
2467                MC_Chunk* mc2 = *(MC_Chunk**)n2;
2468                return (mc1->data < mc2->data ? -1 : 1);
2469             }
2470
2471             Both mc1 and mc2 are like this
2472             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
2473                 DW_AT_name        : mc1
2474                 DW_AT_decl_file   : 1
2475                 DW_AT_decl_line   : 216
2476                 DW_AT_type        : <5d3>
2477
2478             whereas n1 and n2 do have locations specified.
2479
2480             ---------------------------------------------
2481
2482             We see a DW_TAG_formal_parameter with a type, but
2483             no name and no location.  It's probably part of a function type
2484             construction, thusly, hence ignore it:
2485          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
2486              DW_AT_sibling     : <2c9>
2487              DW_AT_prototyped  : 1
2488              DW_AT_type        : <114>
2489          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2490              DW_AT_type        : <13e>
2491          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2492              DW_AT_type        : <133>
2493
2494             ---------------------------------------------
2495
2496             Is very minimal, like this:
2497             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
2498                 DW_AT_abstract_origin: <7ba>
2499             What that signifies I have no idea.  Ignore.
2500
2501             ----------------------------------------------
2502
2503             Is very minimal, like this:
2504             <200f>: DW_TAG_formal_parameter
2505                 DW_AT_abstract_ori: <1f4c>
2506                 DW_AT_location    : 13440
2507             What that signifies I have no idea.  Ignore.
2508             It might be significant, though: the variable at least
2509             has a location and so might exist somewhere.
2510             Maybe we should handle this.
2511
2512             ---------------------------------------------
2513
2514             <22407>: DW_TAG_variable
2515               DW_AT_name        : (indirect string, offset: 0x6579):
2516                                   vgPlain_trampoline_stuff_start
2517               DW_AT_decl_file   : 29
2518               DW_AT_decl_line   : 56
2519               DW_AT_external    : 1
2520               DW_AT_declaration : 1
2521
2522             Nameless and typeless variable that has a location?  Who
2523             knows.  Not me.
2524             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
2525                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
2526                                      (DW_OP_addr: 3813c7c0)
2527
2528             No, really.  Check it out.  gcc is quite simply borked.
2529             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
2530             // followed by no attributes, and the next DIE is a sibling,
2531             // not a child
2532             */
2533    }
2534    return;
2535
2536   bad_DIE:
2537    dump_bad_die_and_barf("parse_var_DIE", dtag, posn, level,
2538                          c_die, saved_die_c_offset,
2539                          abbv,
2540                          cc);
2541    /*NOTREACHED*/
2542 }
2543
2544 typedef
2545    struct {
2546       /* The fndn_ix file name/dirname table.  Is a mapping from dwarf
2547          integer index to the index in di->fndnpool. */
2548       XArray* /* of UInt* */ fndn_ix_Table;
2549       UWord sibling; // sibling of the last read DIE (if it has a sibling).
2550    }
2551    D3InlParser;
2552
2553 /* Return the function name corresponding to absori.
2554
2555    absori is a 'cooked' reference to a DIE, i.e. absori can be either
2556    in cc->escn_debug_info or in cc->escn_debug_info_alt.
2557    get_inlFnName will uncook absori.
2558
2559    The returned value is a (permanent) string in DebugInfo's .strchunks.
2560
2561    LIMITATION: absori must point in the CU of cc. If absori points
2562    in another CU, returns "UnknownInlinedFun".
2563
2564    Here are the problems to retrieve the fun name if absori is in
2565    another CU:  the DIE reading code cannot properly extract data from
2566    another CU, as the abbv code retrieved in the other CU cannot be
2567    translated in an abbreviation. Reading data from the alternate debug
2568    info also gives problems as the string reference is also in the alternate
2569    file, but when reading the alt DIE, the string form is a 'local' string,
2570    but cannot be read in the current CU, but must be read in the alt CU.
2571    See bug 338803 comment#3 and attachment for a failed attempt to handle
2572    these problems (failed because with the patch, only one alt abbrev hash
2573    table is kept, while we must handle all abbreviations in all CUs
2574    referenced by an absori (being a reference to an alt CU, or a previous
2575    or following CU). */
2576 static const HChar* get_inlFnName (Int absori, const CUConst* cc, Bool td3)
2577 {
2578    Cursor c;
2579    const g_abbv *abbv;
2580    ULong  atag, abbv_code;
2581    UInt   has_children;
2582    UWord  posn;
2583    Bool type_flag, alt_flag;
2584    const HChar *ret = NULL;
2585    FormContents cts;
2586    UInt nf_i;
2587
2588    /* Some inlined subroutine call dwarf entries do not have the abstract
2589       origin attribute, resulting in absori being 0 (see callers of
2590       get_inlFnName). This is observed at least with gcc 6.3.0 when compiling
2591       valgrind with lto. So, in case we have a 0 absori, do not report an
2592       error, instead, rather return an unknown inlined function. */
2593    if (absori == 0) {
2594       static Bool absori0_reported = False;
2595       if (!absori0_reported && VG_(clo_verbosity) > 1) {
2596          VG_(message)(Vg_DebugMsg,
2597                       "Warning: inlined fn name without absori\n"
2598                       "is shown as UnknownInlinedFun\n");
2599          absori0_reported = True;
2600       }
2601       TRACE_D3(" <get_inlFnName>: absori is not set");
2602       return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
2603    }
2604
2605    posn = uncook_die( cc, absori, &type_flag, &alt_flag);
2606    if (type_flag)
2607       cc->barf("get_inlFnName: uncooked absori in type debug info");
2608
2609    /* LIMITATION: check we are in the same CU.
2610       If not, return unknown inlined function name. */
2611    /* if crossing between alt debug info<>normal info
2612           or posn not in the cu range,
2613       then it is in another CU. */
2614    if (alt_flag != cc->is_alt_info
2615        || posn < cc->cu_start_offset
2616        || posn >= cc->cu_start_offset + cc->unit_length) {
2617       static Bool reported = False;
2618       if (!reported && VG_(clo_verbosity) > 1) {
2619          VG_(message)(Vg_DebugMsg,
2620                       "Warning: cross-CU LIMITATION: some inlined fn names\n"
2621                       "might be shown as UnknownInlinedFun\n");
2622          reported = True;
2623       }
2624       TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn);
2625       return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
2626    }
2627
2628    init_Cursor (&c, cc->escn_debug_info, posn, cc->barf,
2629                 "Overrun get_inlFnName absori");
2630
2631    abbv_code = get_ULEB128( &c );
2632    abbv      = get_abbv ( cc, abbv_code);
2633    atag      = abbv->atag;
2634    TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
2635             posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
2636
2637    if (atag == 0)
2638       cc->barf("get_inlFnName: invalid zero tag on DIE");
2639
2640    has_children = abbv->has_children;
2641    if (has_children != DW_children_no && has_children != DW_children_yes)
2642       cc->barf("get_inlFnName: invalid has_children value");
2643
2644    if (atag != DW_TAG_subprogram)
2645       cc->barf("get_inlFnName: absori not a subprogram");
2646
2647    nf_i = 0;
2648    while (True) {
2649       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2650       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2651       nf_i++;
2652       if (attr == 0 && form == 0) break;
2653       get_Form_contents( &cts, cc, &c, False/*td3*/, form );
2654       if (attr == DW_AT_name) {
2655          HChar *fnname;
2656          if (cts.szB >= 0)
2657             cc->barf("get_inlFnName: expecting indirect string");
2658          fnname = ML_(cur_read_strdup)( cts.u.cur,
2659                                         "get_inlFnName.1" );
2660          ret = ML_(addStr)(cc->di, fnname, -1);
2661          ML_(dinfo_free) (fnname);
2662          break; /* Name found, get out of the loop, as this has priority over
2663                  DW_AT_specification. */
2664       }
2665       if (attr == DW_AT_specification) {
2666          UWord cdie;
2667
2668          if (cts.szB == 0)
2669             cc->barf("get_inlFnName: AT specification missing");
2670
2671          /* The recursive call to get_inlFnName will uncook its arg.
2672             So, we need to cook it here, so as to reference the
2673             correct section (e.g. the alt info). */
2674          cdie = cook_die_using_form(cc, (UWord)cts.u.val, form);
2675
2676          /* hoping that there is no loop */
2677          ret = get_inlFnName (cdie, cc, td3);
2678          /* Unclear if having both DW_AT_specification and DW_AT_name is
2679             possible but in any case, we do not break here.
2680             If we find later on a DW_AT_name, it will override the name found
2681             in the DW_AT_specification.*/
2682       }
2683    }
2684
2685    if (ret)
2686       return ret;
2687    else {
2688       TRACE_D3("AbsOriFnNameNotFound");
2689       return ML_(addStr)(cc->di, "AbsOriFnNameNotFound", -1);
2690    }
2691 }
2692
2693 /* Returns True if the (possibly) childrens of the current DIE are interesting
2694    to parse. Returns False otherwise.
2695    If the current DIE has a sibling, the non interesting children can
2696    maybe be skipped (if the DIE has a DW_AT_sibling).  */
2697 __attribute__((noinline))
2698 static Bool parse_inl_DIE (
2699    /*MOD*/D3InlParser* parser,
2700    DW_TAG dtag,
2701    UWord posn,
2702    Int level,
2703    Cursor* c_die,
2704    const g_abbv *abbv,
2705    CUConst* cc,
2706    Bool td3
2707 )
2708 {
2709    FormContents cts;
2710    UInt nf_i;
2711
2712    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2713
2714    /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit 'which
2715       in theory could also contain inlined fn calls).  */
2716    if (dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit) {
2717       Bool have_lo    = False;
2718       Addr ip_lo    = 0;
2719       const HChar *compdir = NULL;
2720
2721       nf_i = 0;
2722       while (True) {
2723          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2724          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2725          nf_i++;
2726          if (attr == 0 && form == 0) break;
2727          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2728          if (attr == DW_AT_low_pc && cts.szB > 0) {
2729             ip_lo   = cts.u.val;
2730             have_lo = True;
2731          }
2732          if (attr == DW_AT_comp_dir) {
2733             if (cts.szB >= 0)
2734                cc->barf("parse_inl_DIE compdir: expecting indirect string");
2735             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2736                                                "parse_inl_DIE.compdir" );
2737             compdir = ML_(addStr)(cc->di, str, -1);
2738             ML_(dinfo_free) (str);
2739          }
2740          if (attr == DW_AT_stmt_list && cts.szB > 0) {
2741             read_filename_table( parser->fndn_ix_Table, compdir,
2742                                  cc, cts.u.val, td3 );
2743          }
2744          if (attr == DW_AT_sibling && cts.szB > 0) {
2745             parser->sibling = cts.u.val;
2746          }
2747       }
2748       if (level == 0)
2749          setup_cu_svma (cc, have_lo, ip_lo, td3);
2750    }
2751
2752    if (dtag == DW_TAG_inlined_subroutine) {
2753       Bool   have_lo    = False;
2754       Bool   have_hi1   = False;
2755       Bool   have_range = False;
2756       Bool   hiIsRelative = False;
2757       Addr   ip_lo      = 0;
2758       Addr   ip_hi1     = 0;
2759       Addr   rangeoff   = 0;
2760       UInt   caller_fndn_ix = 0;
2761       Int caller_lineno = 0;
2762       Int inlinedfn_abstract_origin = 0;
2763       // 0 will be interpreted as no abstract origin by get_inlFnName
2764
2765       nf_i = 0;
2766       while (True) {
2767          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2768          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2769          nf_i++;
2770          if (attr == 0 && form == 0) break;
2771          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2772          if (attr == DW_AT_call_file && cts.szB > 0) {
2773             Int ftabIx = (Int)cts.u.val;
2774             if (ftabIx >= 1
2775                 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
2776                caller_fndn_ix = *(UInt*)
2777                           VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
2778             }
2779             if (0) VG_(printf)("XXX caller_fndn_ix = %u %s\n", caller_fndn_ix,
2780                                ML_(fndn_ix2filename) (cc->di, caller_fndn_ix));
2781          }
2782          if (attr == DW_AT_call_line && cts.szB > 0) {
2783             caller_lineno = cts.u.val;
2784          }
2785
2786          if (attr == DW_AT_abstract_origin  && cts.szB > 0) {
2787             inlinedfn_abstract_origin
2788                = cook_die_using_form (cc, (UWord)cts.u.val, form);
2789          }
2790
2791          if (attr == DW_AT_low_pc && cts.szB > 0) {
2792             ip_lo   = cts.u.val;
2793             have_lo = True;
2794          }
2795          if (attr == DW_AT_high_pc && cts.szB > 0) {
2796             ip_hi1   = cts.u.val;
2797             have_hi1 = True;
2798             if (form != DW_FORM_addr)
2799                hiIsRelative = True;
2800          }
2801          if (attr == DW_AT_ranges && cts.szB > 0) {
2802             rangeoff   = cts.u.val;
2803             have_range = True;
2804          }
2805          if (attr == DW_AT_sibling && cts.szB > 0) {
2806             parser->sibling = cts.u.val;
2807          }
2808       }
2809       if (have_lo && have_hi1 && hiIsRelative)
2810          ip_hi1 += ip_lo;
2811       /* Do we have something that looks sane? */
2812       if (dtag == DW_TAG_inlined_subroutine
2813           && (!have_lo) && (!have_hi1) && (!have_range)) {
2814          /* Seems strange. How can an inlined subroutine have
2815             no code ? */
2816          goto_bad_DIE;
2817       } else
2818       if (have_lo && have_hi1 && (!have_range)) {
2819          /* This inlined call is just a single address range. */
2820          if (ip_lo < ip_hi1) {
2821             /* Apply text debug biasing */
2822             ip_lo += cc->di->text_debug_bias;
2823             ip_hi1 += cc->di->text_debug_bias;
2824             ML_(addInlInfo) (cc->di,
2825                              ip_lo, ip_hi1,
2826                              get_inlFnName (inlinedfn_abstract_origin, cc, td3),
2827                              caller_fndn_ix,
2828                              caller_lineno, level);
2829          }
2830       } else if (have_range) {
2831          /* This inlined call is several address ranges. */
2832          XArray *ranges;
2833          Word j;
2834          const HChar *inlfnname =
2835             get_inlFnName (inlinedfn_abstract_origin, cc, td3);
2836
2837          /* Ranges are biased for the inline info using the same logic
2838             as what is used for biasing ranges for the var info, for which
2839             ranges are read using cc->cu_svma (see parse_var_DIE).
2840             Then text_debug_bias is added when a (non global) var
2841             is recorded (see just before the call to ML_(addVar)) */
2842          ranges = get_range_list( cc, td3,
2843                                   rangeoff, cc->cu_svma );
2844          for (j = 0; j < VG_(sizeXA)( ranges ); j++) {
2845             AddrRange* range = (AddrRange*) VG_(indexXA)( ranges, j );
2846             ML_(addInlInfo) (cc->di,
2847                              range->aMin   + cc->di->text_debug_bias,
2848                              range->aMax+1 + cc->di->text_debug_bias,
2849                              // aMax+1 as range has its last bound included
2850                              // while ML_(addInlInfo) expects last bound not
2851                              // included.
2852                              inlfnname,
2853                              caller_fndn_ix,
2854                              caller_lineno, level);
2855          }
2856          VG_(deleteXA)( ranges );
2857       } else
2858          goto_bad_DIE;
2859    }
2860
2861    // Only recursively parse the (possible) children for the DIE which
2862    // might maybe contain a DW_TAG_inlined_subroutine:
2863    return dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram
2864       || dtag == DW_TAG_inlined_subroutine
2865       || dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit;
2866
2867   bad_DIE:
2868    dump_bad_die_and_barf("parse_inl_DIE", dtag, posn, level,
2869                          c_die, saved_die_c_offset,
2870                          abbv,
2871                          cc);
2872    /*NOTREACHED*/
2873 }
2874
2875
2876 /*------------------------------------------------------------*/
2877 /*---                                                      ---*/
2878 /*--- Parsing of type-related DIEs                         ---*/
2879 /*---                                                      ---*/
2880 /*------------------------------------------------------------*/
2881
2882 typedef
2883    struct {
2884       /* What source language?  'A'=Ada83/95,
2885                                 'C'=C/C++,
2886                                 'F'=Fortran,
2887                                 '?'=other
2888          Established once per compilation unit. */
2889       UChar language;
2890       /* A stack of types which are currently under construction */
2891       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
2892                    stack */
2893       Int   stack_size;
2894       /* Note that the TyEnts in qparentE are temporary copies of the
2895          ones accumulating in the main tyent array.  So it is not safe
2896          to free up anything on them when popping them off the stack
2897          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
2898          memset them to zero when done. */
2899       TyEnt *qparentE; /* parent TyEnts */
2900       Int   *qlevel;
2901    }
2902    D3TypeParser;
2903
2904 /* Completely initialise a type parser object */
2905 static void
2906 type_parser_init ( D3TypeParser *parser )
2907 {
2908    parser->sp = -1;
2909    parser->language = '?';
2910    parser->stack_size = 0;
2911    parser->qparentE = NULL;
2912    parser->qlevel   = NULL;
2913 }
2914
2915 /* Release any memory hanging off a type parser object */
2916 static void
2917 type_parser_release ( D3TypeParser *parser )
2918 {
2919    ML_(dinfo_free)( parser->qparentE );
2920    ML_(dinfo_free)( parser->qlevel );
2921 }
2922
2923 static void typestack_show ( const D3TypeParser* parser, const HChar* str )
2924 {
2925    Word i;
2926    VG_(printf)("  typestack (%s) {\n", str);
2927    for (i = 0; i <= parser->sp; i++) {
2928       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
2929       ML_(pp_TyEnt)( &parser->qparentE[i] );
2930       VG_(printf)("\n");
2931    }
2932    VG_(printf)("  }\n");
2933 }
2934
2935 /* Remove from the stack, all entries with .level > 'level' */
2936 static
2937 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
2938 {
2939    Bool changed = False;
2940    vg_assert(parser->sp < parser->stack_size);
2941    while (True) {
2942       vg_assert(parser->sp >= -1);
2943       if (parser->sp == -1) break;
2944       if (parser->qlevel[parser->sp] <= level) break;
2945       if (0)
2946          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
2947       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2948       parser->sp--;
2949       changed = True;
2950    }
2951    if (changed && td3)
2952       typestack_show( parser, "after preen" );
2953 }
2954
2955 static Bool typestack_is_empty ( const D3TypeParser* parser )
2956 {
2957    vg_assert(parser->sp >= -1 && parser->sp < parser->stack_size);
2958    return parser->sp == -1;
2959 }
2960
2961 static void typestack_push ( const CUConst* cc,
2962                              D3TypeParser* parser,
2963                              Bool td3,
2964                              const TyEnt* parentE, Int level )
2965 {
2966    if (0)
2967    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
2968             parser->sp+1, level, parentE->cuOff);
2969
2970    /* First we need to zap everything >= 'level', as we are about to
2971       replace any previous entry at 'level', so .. */
2972    typestack_preen(parser, /*td3*/False, level-1);
2973
2974    vg_assert(parser->sp >= -1);
2975    vg_assert(parser->sp < parser->stack_size);
2976    if (parser->sp == parser->stack_size - 1) {
2977       parser->stack_size += 16;
2978       parser->qparentE =
2979          ML_(dinfo_realloc)("di.readdwarf3.typush.1", parser->qparentE,
2980                             parser->stack_size * sizeof parser->qparentE[0]);
2981       parser->qlevel =
2982          ML_(dinfo_realloc)("di.readdwarf3.typush.2", parser->qlevel,
2983                             parser->stack_size * sizeof parser->qlevel[0]);
2984    }
2985    if (parser->sp >= 0)
2986       vg_assert(parser->qlevel[parser->sp] < level);
2987    parser->sp++;
2988    vg_assert(parentE);
2989    vg_assert(ML_(TyEnt__is_type)(parentE));
2990    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2991    parser->qparentE[parser->sp] = *parentE;
2992    parser->qlevel[parser->sp]  = level;
2993    if (TD3)
2994       typestack_show( parser, "after push" );
2995 }
2996
2997 /* True if the subrange type being parsed gives the bounds of an array. */
2998 static Bool subrange_type_denotes_array_bounds ( const D3TypeParser* parser,
2999                                                  DW_TAG dtag ) {
3000    vg_assert(dtag == DW_TAG_subrange_type);
3001    /* For most languages, a subrange_type dtag always gives the
3002       bounds of an array.
3003       For Ada, there are additional conditions as a subrange_type
3004       is also used for other purposes. */
3005    if (parser->language != 'A')
3006       /* not Ada, so it definitely denotes an array bound. */
3007       return True;
3008    else
3009       /* Extra constraints for Ada: it only denotes an array bound if .. */
3010       return (! typestack_is_empty(parser)
3011               && parser->qparentE[parser->sp].tag == Te_TyArray);
3012 }
3013
3014 /* True if the form is one of the forms supported to give an array bound.
3015    For some arrays (scope local arrays with variable size),
3016    a DW_FORM_ref4 was used, and was wrongly used as the bound value.
3017    So, refuse the forms that are known to give a problem. */
3018 static Bool form_expected_for_bound ( DW_FORM form ) {
3019    if (form == DW_FORM_ref1
3020        || form == DW_FORM_ref2
3021        || form == DW_FORM_ref4
3022        || form == DW_FORM_ref8)
3023       return False;
3024
3025    return True;
3026 }
3027
3028 /* Parse a type-related DIE.  'parser' holds the current parser state.
3029    'admin' is where the completed types are dumped.  'dtag' is the tag
3030    for this DIE.  'c_die' points to the start of the data fields (FORM
3031    stuff) for the DIE.  abbv is the parsed abbreviation which describe
3032    the DIE.
3033
3034    We may find the DIE uninteresting, in which case we should ignore
3035    it.
3036
3037    What happens: the DIE is examined.  If uninteresting, it is ignored.
3038    Otherwise, the DIE gives rise to two things:
3039
3040    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
3041    (2) a TyAdmin structure, which holds the type, or related stuff
3042
3043    (2) is added at the end of 'tyadmins', at some index, say 'i'.
3044
3045    A pair (cuOffset, i) is added to 'tydict'.
3046
3047    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
3048    a mapping from cuOffset to the index of the corresponding entry in
3049    'tyadmin'.
3050
3051    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
3052    in the tydict (by binary search).  This gives an index into
3053    tyadmins, and the required entity lives in tyadmins at that index.
3054 */
3055 __attribute__((noinline))
3056 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
3057                              /*MOD*/D3TypeParser* parser,
3058                              DW_TAG dtag,
3059                              UWord posn,
3060                              Int level,
3061                              Cursor* c_die,
3062                              const g_abbv *abbv,
3063                              const CUConst* cc,
3064                              Bool td3 )
3065 {
3066    FormContents cts;
3067    UInt nf_i;
3068    TyEnt typeE;
3069    TyEnt atomE;
3070    TyEnt fieldE;
3071    TyEnt boundE;
3072
3073    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
3074
3075    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
3076    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
3077    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
3078    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
3079
3080    /* If we've returned to a level at or above any previously noted
3081       parent, un-note it, so we don't believe we're still collecting
3082       its children. */
3083    typestack_preen( parser, td3, level-1 );
3084
3085    if (dtag == DW_TAG_compile_unit
3086        || dtag == DW_TAG_type_unit
3087        || dtag == DW_TAG_partial_unit) {
3088       /* See if we can find DW_AT_language, since it is important for
3089          establishing array bounds (see DW_TAG_subrange_type below in
3090          this fn) */
3091       nf_i = 0;
3092       while (True) {
3093          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3094          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3095          nf_i++;
3096          if (attr == 0 && form == 0) break;
3097          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3098          if (attr != DW_AT_language)
3099             continue;
3100          if (cts.szB <= 0)
3101            goto_bad_DIE;
3102          switch (cts.u.val) {
3103             case DW_LANG_C89: case DW_LANG_C:
3104             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
3105             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
3106             case DW_LANG_Upc: case DW_LANG_C99: case DW_LANG_C11:
3107             case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14:
3108                parser->language = 'C'; break;
3109             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
3110             case DW_LANG_Fortran95: case DW_LANG_Fortran03:
3111             case DW_LANG_Fortran08:
3112                parser->language = 'F'; break;
3113             case DW_LANG_Ada83: case DW_LANG_Ada95:
3114                parser->language = 'A'; break;
3115             case DW_LANG_Cobol74:
3116             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
3117             case DW_LANG_Modula2: case DW_LANG_Java:
3118             case DW_LANG_PLI:
3119             case DW_LANG_D: case DW_LANG_Python: case DW_LANG_Go:
3120             case DW_LANG_Mips_Assembler:
3121                parser->language = '?'; break;
3122             default:
3123                goto_bad_DIE;
3124          }
3125       }
3126    }
3127
3128    if (dtag == DW_TAG_base_type) {
3129       /* We can pick up a new base type any time. */
3130       VG_(memset)(&typeE, 0, sizeof(typeE));
3131       typeE.cuOff = D3_INVALID_CUOFF;
3132       typeE.tag   = Te_TyBase;
3133       nf_i = 0;
3134       while (True) {
3135          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3136          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3137          nf_i++;
3138          if (attr == 0 && form == 0) break;
3139          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3140          if (attr == DW_AT_name && cts.szB < 0) {
3141             typeE.Te.TyBase.name
3142                = ML_(cur_read_strdup)( cts.u.cur,
3143                                        "di.readdwarf3.ptD.base_type.1" );
3144          }
3145          if (attr == DW_AT_byte_size && cts.szB > 0) {
3146             typeE.Te.TyBase.szB = cts.u.val;
3147          }
3148          if (attr == DW_AT_encoding && cts.szB > 0) {
3149             switch (cts.u.val) {
3150                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
3151                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
3152                case DW_ATE_boolean:/* FIXME - is this correct? */
3153                case DW_ATE_unsigned_fixed:
3154                   typeE.Te.TyBase.enc = 'U'; break;
3155                case DW_ATE_signed: case DW_ATE_signed_char:
3156                case DW_ATE_signed_fixed:
3157                   typeE.Te.TyBase.enc = 'S'; break;
3158                case DW_ATE_float:
3159                   typeE.Te.TyBase.enc = 'F'; break;
3160                case DW_ATE_complex_float:
3161                   typeE.Te.TyBase.enc = 'C'; break;
3162                default:
3163                   goto_bad_DIE;
3164             }
3165          }
3166       }
3167
3168       /* Invent a name if it doesn't have one.  gcc-4.3
3169          -ftree-vectorize is observed to emit nameless base types. */
3170       if (!typeE.Te.TyBase.name)
3171          typeE.Te.TyBase.name
3172             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
3173                                  "<anon_base_type>" );
3174
3175       /* Do we have something that looks sane? */
3176       if (/* must have a name */
3177           typeE.Te.TyBase.name == NULL
3178           /* and a plausible size.  Yes, really 32: "complex long
3179              double" apparently has size=32 */
3180           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
3181           /* and a plausible encoding */
3182           || (typeE.Te.TyBase.enc != 'U'
3183               && typeE.Te.TyBase.enc != 'S'
3184               && typeE.Te.TyBase.enc != 'F'
3185               && typeE.Te.TyBase.enc != 'C'))
3186          goto_bad_DIE;
3187       /* Last minute hack: if we see this
3188          <1><515>: DW_TAG_base_type
3189              DW_AT_byte_size   : 0
3190              DW_AT_encoding    : 5
3191              DW_AT_name        : void
3192          convert it into a real Void type. */
3193       if (typeE.Te.TyBase.szB == 0
3194           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
3195          ML_(TyEnt__make_EMPTY)(&typeE);
3196          typeE.tag = Te_TyVoid;
3197          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
3198       }
3199
3200       goto acquire_Type;
3201    }
3202
3203    /*
3204     * An example of DW_TAG_rvalue_reference_type:
3205     *
3206     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3207     *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
3208     *     <1015>   DW_AT_byte_size   : 4
3209     *     <1016>   DW_AT_type        : <0xe52>
3210     */
3211    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
3212        || dtag == DW_TAG_ptr_to_member_type
3213        || dtag == DW_TAG_rvalue_reference_type) {
3214       /* This seems legit for _pointer_type and _reference_type.  I
3215          don't know if rolling _ptr_to_member_type in here really is
3216          legit, but it's better than not handling it at all. */
3217       VG_(memset)(&typeE, 0, sizeof(typeE));
3218       typeE.cuOff = D3_INVALID_CUOFF;
3219       switch (dtag) {
3220       case DW_TAG_pointer_type:
3221          typeE.tag = Te_TyPtr;
3222          break;
3223       case DW_TAG_reference_type:
3224          typeE.tag = Te_TyRef;
3225          break;
3226       case DW_TAG_ptr_to_member_type:
3227          typeE.tag = Te_TyPtrMbr;
3228          break;
3229       case DW_TAG_rvalue_reference_type:
3230          typeE.tag = Te_TyRvalRef;
3231          break;
3232       default:
3233          vg_assert(False);
3234       }
3235       /* target type defaults to void */
3236       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
3237       /* These four type kinds don't *have* to specify their size, in
3238          which case we assume it's a machine word.  But if they do
3239          specify it, it must be a machine word :-)  This probably
3240          assumes that the word size of the Dwarf3 we're reading is the
3241          same size as that on the machine.  gcc appears to give a size
3242          whereas icc9 doesn't. */
3243       typeE.Te.TyPorR.szB = sizeof(UWord);
3244       nf_i = 0;
3245       while (True) {
3246          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3247          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3248          nf_i++;
3249          if (attr == 0 && form == 0) break;
3250          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3251          if (attr == DW_AT_byte_size && cts.szB > 0) {
3252             typeE.Te.TyPorR.szB = cts.u.val;
3253          }
3254          if (attr == DW_AT_type && cts.szB > 0) {
3255             typeE.Te.TyPorR.typeR
3256                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3257          }
3258       }
3259       /* Do we have something that looks sane? */
3260       if (typeE.Te.TyPorR.szB != sizeof(UWord))
3261          goto_bad_DIE;
3262       else
3263          goto acquire_Type;
3264    }
3265
3266    if (dtag == DW_TAG_enumeration_type) {
3267       /* Create a new Type to hold the results. */
3268       VG_(memset)(&typeE, 0, sizeof(typeE));
3269       typeE.cuOff = posn;
3270       typeE.tag   = Te_TyEnum;
3271       Bool is_decl = False;
3272       typeE.Te.TyEnum.atomRs
3273          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
3274                        ML_(dinfo_free),
3275                        sizeof(UWord) );
3276       nf_i=0;
3277       while (True) {
3278          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3279          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3280          nf_i++;
3281          if (attr == 0 && form == 0) break;
3282          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3283          if (attr == DW_AT_name && cts.szB < 0) {
3284             typeE.Te.TyEnum.name
3285                = ML_(cur_read_strdup)( cts.u.cur,
3286                                        "di.readdwarf3.pTD.enum_type.2" );
3287          }
3288          if (attr == DW_AT_byte_size && cts.szB > 0) {
3289             typeE.Te.TyEnum.szB = cts.u.val;
3290          }
3291          if (attr == DW_AT_declaration) {
3292             is_decl = True;
3293          }
3294       }
3295
3296       if (!typeE.Te.TyEnum.name)
3297          typeE.Te.TyEnum.name
3298             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
3299                                  "<anon_enum_type>" );
3300
3301       /* Do we have something that looks sane? */
3302       if (typeE.Te.TyEnum.szB == 0
3303           /* we must know the size */
3304           /* but not for Ada, which uses such dummy
3305              enumerations as helper for gdb ada mode.
3306              Also GCC allows incomplete enums as GNU extension.
3307              http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
3308              These are marked as DW_AT_declaration and won't have
3309              a size. They can only be used in declaration or as
3310              pointer types.  You can't allocate variables or storage
3311              using such an enum type. (Also GCC seems to have a bug
3312              that will put such an enumeration_type into a .debug_types
3313              unit which should only contain complete types.) */
3314           && (parser->language != 'A' && !is_decl)) {
3315          goto_bad_DIE;
3316       }
3317
3318       /* On't stack! */
3319       typestack_push( cc, parser, td3, &typeE, level );
3320       goto acquire_Type;
3321    }
3322
3323    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
3324       DW_TAG_enumerator with only a DW_AT_name but no
3325       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
3326       and appears to be a new "feature" of gcc - versions 4.3.x and
3327       earlier do not appear to do this.  So accept DW_TAG_enumerator
3328       which only have a name but no value.  An example:
3329
3330       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
3331          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
3332                                      QtMsgType
3333          <185>   DW_AT_byte_size   : 4
3334          <186>   DW_AT_decl_file   : 14
3335          <187>   DW_AT_decl_line   : 1480
3336          <189>   DW_AT_sibling     : <0x1a7>
3337       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
3338          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
3339                                      QtDebugMsg
3340       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
3341          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
3342                                      QtWarningMsg
3343       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
3344          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
3345                                      QtCriticalMsg
3346       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
3347          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
3348                                      QtFatalMsg
3349       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
3350          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
3351                                      QtSystemMsg
3352    */
3353    if (dtag == DW_TAG_enumerator) {
3354       VG_(memset)( &atomE, 0, sizeof(atomE) );
3355       atomE.cuOff = posn;
3356       atomE.tag   = Te_Atom;
3357       nf_i = 0;
3358       while (True) {
3359          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3360          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3361          nf_i++;
3362          if (attr == 0 && form == 0) break;
3363          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3364          if (attr == DW_AT_name && cts.szB < 0) {
3365             atomE.Te.Atom.name
3366               = ML_(cur_read_strdup)( cts.u.cur,
3367                                       "di.readdwarf3.pTD.enumerator.1" );
3368          }
3369          if (attr == DW_AT_const_value && cts.szB > 0) {
3370             atomE.Te.Atom.value      = cts.u.val;
3371             atomE.Te.Atom.valueKnown = True;
3372          }
3373       }
3374       /* Do we have something that looks sane? */
3375       if (atomE.Te.Atom.name == NULL)
3376          goto_bad_DIE;
3377       /* Do we have a plausible parent? */
3378       if (typestack_is_empty(parser)) goto_bad_DIE;
3379       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3380       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3381       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3382       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
3383       /* Record this child in the parent */
3384       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
3385       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
3386                     &atomE );
3387       /* And record the child itself */
3388       goto acquire_Atom;
3389    }
3390
3391    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
3392       don't know if this is correct, but it at least makes this reader
3393       usable for gcc-4.3 produced Dwarf3. */
3394    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
3395        || dtag == DW_TAG_union_type) {
3396       Bool have_szB = False;
3397       Bool is_decl  = False;
3398       Bool is_spec  = False;
3399       /* Create a new Type to hold the results. */
3400       VG_(memset)(&typeE, 0, sizeof(typeE));
3401       typeE.cuOff = posn;
3402       typeE.tag   = Te_TyStOrUn;
3403       typeE.Te.TyStOrUn.name = NULL;
3404       typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
3405       typeE.Te.TyStOrUn.fieldRs
3406          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
3407                        ML_(dinfo_free),
3408                        sizeof(UWord) );
3409       typeE.Te.TyStOrUn.complete = True;
3410       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
3411                                    || dtag == DW_TAG_class_type;
3412       nf_i = 0;
3413       while (True) {
3414          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3415          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3416          nf_i++;
3417          if (attr == 0 && form == 0) break;
3418          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3419          if (attr == DW_AT_name && cts.szB < 0) {
3420             typeE.Te.TyStOrUn.name
3421                = ML_(cur_read_strdup)( cts.u.cur,
3422                                        "di.readdwarf3.ptD.struct_type.2" );
3423          }
3424          if (attr == DW_AT_byte_size && cts.szB >= 0) {
3425             typeE.Te.TyStOrUn.szB = cts.u.val;
3426             have_szB = True;
3427          }
3428          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
3429             is_decl = True;
3430          }
3431          if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
3432             is_spec = True;
3433          }
3434          if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
3435              && cts.szB > 0) {
3436             have_szB = True;
3437             typeE.Te.TyStOrUn.szB = 8;
3438             typeE.Te.TyStOrUn.typeR
3439                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3440          }
3441       }
3442       /* Do we have something that looks sane? */
3443       if (is_decl && (!is_spec)) {
3444          /* It's a DW_AT_declaration.  We require the name but
3445             nothing else. */
3446          /* JRS 2012-06-28: following discussion w/ tromey, if the
3447             type doesn't have name, just make one up, and accept it.
3448             It might be referred to by other DIEs, so ignoring it
3449             doesn't seem like a safe option. */
3450          if (typeE.Te.TyStOrUn.name == NULL)
3451             typeE.Te.TyStOrUn.name
3452                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
3453                                     "<anon_struct_type>" );
3454          typeE.Te.TyStOrUn.complete = False;
3455          /* JRS 2009 Aug 10: <possible kludge>? */
3456          /* Push this tyent on the stack, even though it's incomplete.
3457             It appears that gcc-4.4 on Fedora 11 will sometimes create
3458             DW_TAG_member entries for it, and so we need to have a
3459             plausible parent present in order for that to work.  See
3460             #200029 comments 8 and 9. */
3461          typestack_push( cc, parser, td3, &typeE, level );
3462          /* </possible kludge> */
3463          goto acquire_Type;
3464       }
3465       if ((!is_decl) /* && (!is_spec) */) {
3466          /* this is the common, ordinary case */
3467          /* The name can be present, or not */
3468          if (!have_szB) {
3469             /* We must know the size.
3470                But in Ada, record with discriminants might have no size.
3471                But in C, VLA in the middle of a struct (gcc extension)
3472                might have no size.
3473                Instead, some GNAT dwarf extensions and/or dwarf entries
3474                allow to calculate the struct size at runtime.
3475                We cannot do that (yet?) so, the temporary kludge is to use
3476                a small size. */
3477             typeE.Te.TyStOrUn.szB = 1;
3478          }
3479          /* On't stack! */
3480          typestack_push( cc, parser, td3, &typeE, level );
3481          goto acquire_Type;
3482       }
3483       else {
3484          /* don't know how to handle any other variants just now */
3485          goto_bad_DIE;
3486       }
3487    }
3488
3489    if (dtag == DW_TAG_member) {
3490       /* Acquire member entries for both DW_TAG_structure_type and
3491          DW_TAG_union_type.  They differ minorly, in that struct
3492          members must have a DW_AT_data_member_location expression
3493          whereas union members must not. */
3494       Bool parent_is_struct;
3495       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
3496       fieldE.cuOff = posn;
3497       fieldE.tag   = Te_Field;
3498       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
3499       nf_i = 0;
3500       while (True) {
3501          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3502          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3503          nf_i++;
3504          if (attr == 0 && form == 0) break;
3505          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3506          if (attr == DW_AT_name && cts.szB < 0) {
3507             fieldE.Te.Field.name
3508                = ML_(cur_read_strdup)( cts.u.cur,
3509                                        "di.readdwarf3.ptD.member.1" );
3510          }
3511          if (attr == DW_AT_type && cts.szB > 0) {
3512             fieldE.Te.Field.typeR
3513                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3514          }
3515          /* There are 2 different cases for DW_AT_data_member_location.
3516             If it is a constant class attribute, it contains byte offset
3517             from the beginning of the containing entity.
3518             Otherwise it is a location expression.  */
3519          if (attr == DW_AT_data_member_location && cts.szB > 0) {
3520             fieldE.Te.Field.nLoc = -1;
3521             fieldE.Te.Field.pos.offset = cts.u.val;
3522          }
3523          if (attr == DW_AT_data_member_location && cts.szB <= 0) {
3524             fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
3525             fieldE.Te.Field.pos.loc
3526                = ML_(cur_read_memdup)( cts.u.cur,
3527                                        (SizeT)fieldE.Te.Field.nLoc,
3528                                        "di.readdwarf3.ptD.member.2" );
3529          }
3530       }
3531       /* Do we have a plausible parent? */
3532       if (typestack_is_empty(parser)) goto_bad_DIE;
3533       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3534       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3535       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3536       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
3537       /* Do we have something that looks sane?  If this a member of a
3538          struct, we must have a location expression; but if a member
3539          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
3540          to reject in the latter case, but some compilers have been
3541          observed to emit constant-zero expressions.  So just ignore
3542          them. */
3543       parent_is_struct
3544          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
3545       if (!fieldE.Te.Field.name)
3546          fieldE.Te.Field.name
3547             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
3548                                  "<anon_field>" );
3549       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
3550          goto_bad_DIE;
3551       if (fieldE.Te.Field.nLoc) {
3552          if (!parent_is_struct) {
3553             /* If this is a union type, pretend we haven't seen the data
3554                member location expression, as it is by definition
3555                redundant (it must be zero). */
3556             if (fieldE.Te.Field.nLoc > 0)
3557                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
3558             fieldE.Te.Field.pos.loc = NULL;
3559             fieldE.Te.Field.nLoc = 0;
3560          }
3561          /* Record this child in the parent */
3562          fieldE.Te.Field.isStruct = parent_is_struct;
3563          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
3564          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
3565                        &posn );
3566          /* And record the child itself */
3567          goto acquire_Field;
3568       } else {
3569          /* Member with no location - this can happen with static
3570             const members in C++ code which are compile time constants
3571             that do no exist in the class. They're not of any interest
3572             to us so we ignore them. */
3573          ML_(TyEnt__make_EMPTY)(&fieldE);
3574       }
3575    }
3576
3577    if (dtag == DW_TAG_array_type) {
3578       VG_(memset)(&typeE, 0, sizeof(typeE));
3579       typeE.cuOff = posn;
3580       typeE.tag   = Te_TyArray;
3581       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
3582       typeE.Te.TyArray.boundRs
3583          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
3584                        ML_(dinfo_free),
3585                        sizeof(UWord) );
3586       nf_i = 0;
3587       while (True) {
3588          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3589          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3590          nf_i++;
3591          if (attr == 0 && form == 0) break;
3592          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3593          if (attr == DW_AT_type && cts.szB > 0) {
3594             typeE.Te.TyArray.typeR
3595                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3596          }
3597       }
3598       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
3599          goto_bad_DIE;
3600       /* On't stack! */
3601       typestack_push( cc, parser, td3, &typeE, level );
3602       goto acquire_Type;
3603    }
3604
3605    /* this is a subrange type defining the bounds of an array. */
3606    if (dtag == DW_TAG_subrange_type
3607        && subrange_type_denotes_array_bounds(parser, dtag)) {
3608       Bool have_lower = False;
3609       Bool have_upper = False;
3610       Bool have_count = False;
3611       Long lower = 0;
3612       Long upper = 0;
3613
3614       switch (parser->language) {
3615          case 'C': have_lower = True;  lower = 0; break;
3616          case 'F': have_lower = True;  lower = 1; break;
3617          case '?': have_lower = False; break;
3618          case 'A': have_lower = False; break;
3619          default:  vg_assert(0); /* assured us by handling of
3620                                     DW_TAG_compile_unit in this fn */
3621       }
3622
3623       VG_(memset)( &boundE, 0, sizeof(boundE) );
3624       boundE.cuOff = D3_INVALID_CUOFF;
3625       boundE.tag   = Te_Bound;
3626       nf_i = 0;
3627       while (True) {
3628          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3629          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3630          nf_i++;
3631          if (attr == 0 && form == 0) break;
3632          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3633          if (attr == DW_AT_lower_bound && cts.szB > 0
3634              && form_expected_for_bound (form)) {
3635             lower      = (Long)cts.u.val;
3636             have_lower = True;
3637          }
3638          if (attr == DW_AT_upper_bound && cts.szB > 0
3639              && form_expected_for_bound (form)) {
3640             upper      = (Long)cts.u.val;
3641             have_upper = True;
3642          }
3643          if (attr == DW_AT_count && cts.szB > 0) {
3644             /*count    = (Long)cts.u.val;*/
3645             have_count = True;
3646          }
3647       }
3648       /* FIXME: potentially skip the rest if no parent present, since
3649          it could be the case that this subrange type is free-standing
3650          (not being used to describe the bounds of a containing array
3651          type) */
3652       /* Do we have a plausible parent? */
3653       if (typestack_is_empty(parser)) goto_bad_DIE;
3654       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3655       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3656       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3657       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
3658
3659       /* Figure out if we have a definite range or not */
3660       if (have_lower && have_upper && (!have_count)) {
3661          boundE.Te.Bound.knownL = True;
3662          boundE.Te.Bound.knownU = True;
3663          boundE.Te.Bound.boundL = lower;
3664          boundE.Te.Bound.boundU = upper;
3665       }
3666       else if (have_lower && (!have_upper) && (!have_count)) {
3667          boundE.Te.Bound.knownL = True;
3668          boundE.Te.Bound.knownU = False;
3669          boundE.Te.Bound.boundL = lower;
3670          boundE.Te.Bound.boundU = 0;
3671       }
3672       else if ((!have_lower) && have_upper && (!have_count)) {
3673          boundE.Te.Bound.knownL = False;
3674          boundE.Te.Bound.knownU = True;
3675          boundE.Te.Bound.boundL = 0;
3676          boundE.Te.Bound.boundU = upper;
3677       }
3678       else if ((!have_lower) && (!have_upper) && (!have_count)) {
3679          boundE.Te.Bound.knownL = False;
3680          boundE.Te.Bound.knownU = False;
3681          boundE.Te.Bound.boundL = 0;
3682          boundE.Te.Bound.boundU = 0;
3683       } else {
3684          /* FIXME: handle more cases */
3685          goto_bad_DIE;
3686       }
3687
3688       /* Record this bound in the parent */
3689       boundE.cuOff = posn;
3690       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
3691       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
3692                     &boundE.cuOff );
3693       /* And record the child itself */
3694       goto acquire_Bound;
3695    }
3696
3697    /* typedef or subrange_type other than array bounds. */
3698    if (dtag == DW_TAG_typedef
3699        || (dtag == DW_TAG_subrange_type
3700            && !subrange_type_denotes_array_bounds(parser, dtag))) {
3701       /* subrange_type other than array bound is only for Ada. */
3702       vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
3703       /* We can pick up a new typedef/subrange_type any time. */
3704       VG_(memset)(&typeE, 0, sizeof(typeE));
3705       typeE.cuOff = D3_INVALID_CUOFF;
3706       typeE.tag   = Te_TyTyDef;
3707       typeE.Te.TyTyDef.name = NULL;
3708       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
3709       nf_i = 0;
3710       while (True) {
3711          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3712          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3713          nf_i++;
3714          if (attr == 0 && form == 0) break;
3715          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3716          if (attr == DW_AT_name && cts.szB < 0) {
3717             typeE.Te.TyTyDef.name
3718                = ML_(cur_read_strdup)( cts.u.cur,
3719                                        "di.readdwarf3.ptD.typedef.1" );
3720          }
3721          if (attr == DW_AT_type && cts.szB > 0) {
3722             typeE.Te.TyTyDef.typeR
3723                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3724          }
3725       }
3726       /* Do we have something that looks sane?
3727          gcc gnat Ada generates minimal typedef
3728          such as the below
3729          <6><91cc>: DW_TAG_typedef
3730             DW_AT_abstract_ori: <9066>
3731          g++ for OMP can generate artificial functions that have
3732          parameters that refer to pointers to unnamed typedefs.
3733          See https://bugs.kde.org/show_bug.cgi?id=273475
3734          So we cannot require a name for a DW_TAG_typedef.
3735       */
3736       goto acquire_Type;
3737    }
3738
3739    if (dtag == DW_TAG_subroutine_type) {
3740       /* function type? just record that one fact and ask no
3741          further questions. */
3742       VG_(memset)(&typeE, 0, sizeof(typeE));
3743       typeE.cuOff = D3_INVALID_CUOFF;
3744       typeE.tag   = Te_TyFn;
3745       goto acquire_Type;
3746    }
3747
3748    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type
3749        || dtag == DW_TAG_restrict_type) {
3750       Int have_ty = 0;
3751       VG_(memset)(&typeE, 0, sizeof(typeE));
3752       typeE.cuOff = D3_INVALID_CUOFF;
3753       typeE.tag   = Te_TyQual;
3754       typeE.Te.TyQual.qual
3755          = (dtag == DW_TAG_volatile_type ? 'V'
3756             : (dtag == DW_TAG_const_type ? 'C' : 'R'));
3757       /* target type defaults to 'void' */
3758       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3759       nf_i = 0;
3760       while (True) {
3761          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3762          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3763          nf_i++;
3764          if (attr == 0 && form == 0) break;
3765          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3766          if (attr == DW_AT_type && cts.szB > 0) {
3767             typeE.Te.TyQual.typeR
3768                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3769             have_ty++;
3770          }
3771       }
3772       /* gcc sometimes generates DW_TAG_const/volatile_type without
3773          DW_AT_type and GDB appears to interpret the type as 'const
3774          void' (resp. 'volatile void').  So just allow it .. */
3775       if (have_ty == 1 || have_ty == 0)
3776          goto acquire_Type;
3777       else
3778          goto_bad_DIE;
3779    }
3780
3781    /*
3782     * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
3783     *
3784     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3785     *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
3786     *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
3787     */
3788    if (dtag == DW_TAG_unspecified_type) {
3789       VG_(memset)(&typeE, 0, sizeof(typeE));
3790       typeE.cuOff           = D3_INVALID_CUOFF;
3791       typeE.tag             = Te_TyQual;
3792       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3793       goto acquire_Type;
3794    }
3795
3796    /* else ignore this DIE */
3797    return;
3798    /*NOTREACHED*/
3799
3800   acquire_Type:
3801    if (0) VG_(printf)("YYYY Acquire Type\n");
3802    vg_assert(ML_(TyEnt__is_type)( &typeE ));
3803    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
3804    typeE.cuOff = posn;
3805    VG_(addToXA)( tyents, &typeE );
3806    return;
3807    /*NOTREACHED*/
3808
3809   acquire_Atom:
3810    if (0) VG_(printf)("YYYY Acquire Atom\n");
3811    vg_assert(atomE.tag == Te_Atom);
3812    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
3813    atomE.cuOff = posn;
3814    VG_(addToXA)( tyents, &atomE );
3815    return;
3816    /*NOTREACHED*/
3817
3818   acquire_Field:
3819    /* For union members, Expr should be absent */
3820    if (0) VG_(printf)("YYYY Acquire Field\n");
3821    vg_assert(fieldE.tag == Te_Field);
3822    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
3823    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
3824    if (fieldE.Te.Field.isStruct) {
3825       vg_assert(fieldE.Te.Field.nLoc != 0);
3826    } else {
3827       vg_assert(fieldE.Te.Field.nLoc == 0);
3828    }
3829    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
3830    fieldE.cuOff = posn;
3831    VG_(addToXA)( tyents, &fieldE );
3832    return;
3833    /*NOTREACHED*/
3834
3835   acquire_Bound:
3836    if (0) VG_(printf)("YYYY Acquire Bound\n");
3837    vg_assert(boundE.tag == Te_Bound);
3838    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
3839    boundE.cuOff = posn;
3840    VG_(addToXA)( tyents, &boundE );
3841    return;
3842    /*NOTREACHED*/
3843
3844   bad_DIE:
3845    dump_bad_die_and_barf("parse_type_DIE", dtag, posn, level,
3846                          c_die, saved_die_c_offset,
3847                          abbv,
3848                          cc);
3849    /*NOTREACHED*/
3850 }
3851
3852
3853 /*------------------------------------------------------------*/
3854 /*---                                                      ---*/
3855 /*--- Compression of type DIE information                  ---*/
3856 /*---                                                      ---*/
3857 /*------------------------------------------------------------*/
3858
3859 static UWord chase_cuOff ( Bool* changed,
3860                            const XArray* /* of TyEnt */ ents,
3861                            TyEntIndexCache* ents_cache,
3862                            UWord cuOff )
3863 {
3864    TyEnt* ent;
3865    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
3866
3867    if (!ent) {
3868       VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
3869       *changed = False;
3870       return cuOff;
3871    }
3872
3873    vg_assert(ent->tag != Te_EMPTY);
3874    if (ent->tag != Te_INDIR) {
3875       *changed = False;
3876       return cuOff;
3877    } else {
3878       vg_assert(ent->Te.INDIR.indR < cuOff);
3879       *changed = True;
3880       return ent->Te.INDIR.indR;
3881    }
3882 }
3883
3884 static
3885 void chase_cuOffs_in_XArray ( Bool* changed,
3886                               const XArray* /* of TyEnt */ ents,
3887                               TyEntIndexCache* ents_cache,
3888                               /*MOD*/XArray* /* of UWord */ cuOffs )
3889 {
3890    Bool b2 = False;
3891    Word i, n = VG_(sizeXA)( cuOffs );
3892    for (i = 0; i < n; i++) {
3893       Bool   b = False;
3894       UWord* p = VG_(indexXA)( cuOffs, i );
3895       *p = chase_cuOff( &b, ents, ents_cache, *p );
3896       if (b)
3897          b2 = True;
3898    }
3899    *changed = b2;
3900 }
3901
3902 static Bool TyEnt__subst_R_fields ( const XArray* /* of TyEnt */ ents,
3903                                     TyEntIndexCache* ents_cache,
3904                                     /*MOD*/TyEnt* te )
3905 {
3906    Bool b, changed = False;
3907    switch (te->tag) {
3908       case Te_EMPTY:
3909          break;
3910       case Te_INDIR:
3911          te->Te.INDIR.indR
3912             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
3913          if (b) changed = True;
3914          break;
3915       case Te_UNKNOWN:
3916          break;
3917       case Te_Atom:
3918          break;
3919       case Te_Field:
3920          te->Te.Field.typeR
3921             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
3922          if (b) changed = True;
3923          break;
3924       case Te_Bound:
3925          break;
3926       case Te_TyBase:
3927          break;
3928       case Te_TyPtr:
3929       case Te_TyRef:
3930       case Te_TyPtrMbr:
3931       case Te_TyRvalRef:
3932          te->Te.TyPorR.typeR
3933             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
3934          if (b) changed = True;
3935          break;
3936       case Te_TyTyDef:
3937          te->Te.TyTyDef.typeR
3938             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
3939          if (b) changed = True;
3940          break;
3941       case Te_TyStOrUn:
3942          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
3943          if (b) changed = True;
3944          break;
3945       case Te_TyEnum:
3946          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
3947          if (b) changed = True;
3948          break;
3949       case Te_TyArray:
3950          te->Te.TyArray.typeR
3951             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
3952          if (b) changed = True;
3953          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
3954          if (b) changed = True;
3955          break;
3956       case Te_TyFn:
3957          break;
3958       case Te_TyQual:
3959          te->Te.TyQual.typeR
3960             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
3961          if (b) changed = True;
3962          break;
3963       case Te_TyVoid:
3964          break;
3965       default:
3966          ML_(pp_TyEnt)(te);
3967          vg_assert(0);
3968    }
3969    return changed;
3970 }
3971
3972 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
3973    'R' or 'Rs' fields (those which refer to other tyents), and replace
3974    any which point to INDIR nodes with the target of the indirection
3975    (which should not itself be an indirection).  In summary, this
3976    routine shorts out all references to indirection nodes. */
3977 static
3978 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
3979                                      TyEntIndexCache* ents_cache )
3980 {
3981    Word i, n, nChanged = 0;
3982    Bool b;
3983    n = VG_(sizeXA)( ents );
3984    for (i = 0; i < n; i++) {
3985       TyEnt* ent = VG_(indexXA)( ents, i );
3986       vg_assert(ent->tag != Te_EMPTY);
3987       /* We have to substitute everything, even indirections, so as to
3988          ensure that chains of indirections don't build up. */
3989       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
3990       if (b)
3991          nChanged++;
3992    }
3993
3994    return nChanged;
3995 }
3996
3997
3998 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
3999    Look up each new tyent in the dictionary in turn.  If it is already
4000    in the dictionary, replace this tyent with an indirection to the
4001    existing one, and delete any malloc'd stuff hanging off this one.
4002    In summary, this routine commons up all tyents that are identical
4003    as defined by TyEnt__cmp_by_all_except_cuOff. */
4004 static
4005 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
4006 {
4007    Word    n, i, nDeleted;
4008    WordFM* dict; /* TyEnt* -> void */
4009    TyEnt*  ent;
4010    UWord   keyW, valW;
4011
4012    dict = VG_(newFM)(
4013              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
4014              ML_(dinfo_free),
4015              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
4016           );
4017
4018    nDeleted = 0;
4019    n = VG_(sizeXA)( ents );
4020    for (i = 0; i < n; i++) {
4021       ent = VG_(indexXA)( ents, i );
4022       vg_assert(ent->tag != Te_EMPTY);
4023
4024       /* Ignore indirections, although check that they are
4025          not forming a cycle. */
4026       if (ent->tag == Te_INDIR) {
4027          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
4028          continue;
4029       }
4030
4031       keyW = valW = 0;
4032       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
4033          /* it's already in the dictionary. */
4034          TyEnt* old = (TyEnt*)keyW;
4035          vg_assert(valW == 0);
4036          vg_assert(old != ent);
4037          vg_assert(old->tag != Te_INDIR);
4038          /* since we are traversing the array in increasing order of
4039             cuOff: */
4040          vg_assert(old->cuOff < ent->cuOff);
4041          /* So anyway, dump this entry and replace it with an
4042             indirection to the one in the dictionary.  Note that the
4043             assertion above guarantees that we cannot create cycles of
4044             indirections, since we are always creating an indirection
4045             to a tyent with a cuOff lower than this one. */
4046          ML_(TyEnt__make_EMPTY)( ent );
4047          ent->tag = Te_INDIR;
4048          ent->Te.INDIR.indR = old->cuOff;
4049          nDeleted++;
4050       } else {
4051          /* not in dictionary; add it and keep going. */
4052          VG_(addToFM)( dict, (UWord)ent, 0 );
4053       }
4054    }
4055
4056    VG_(deleteFM)( dict, NULL, NULL );
4057
4058    return nDeleted;
4059 }
4060
4061
4062 static
4063 void dedup_types ( Bool td3,
4064                    /*MOD*/XArray* /* of TyEnt */ ents,
4065                    TyEntIndexCache* ents_cache )
4066 {
4067    Word m, n, i, nDel, nSubst, nThresh;
4068    if (0) td3 = True;
4069
4070    n = VG_(sizeXA)( ents );
4071
4072    /* If a commoning pass and a substitution pass both make fewer than
4073       this many changes, just stop.  It's pointless to burn up CPU
4074       time trying to compress the last 1% or so out of the array. */
4075    nThresh = n / 200;
4076
4077    /* First we must sort .ents by its .cuOff fields, so we
4078       can index into it. */
4079    VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4080    VG_(sortXA)( ents );
4081
4082    /* Now repeatedly do commoning and substitution passes over
4083       the array, until there are no more changes. */
4084    do {
4085       nDel   = dedup_types_commoning_pass ( ents );
4086       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
4087       vg_assert(nDel >= 0 && nSubst >= 0);
4088       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
4089    } while (nDel > nThresh || nSubst > nThresh);
4090
4091    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
4092       In fact this should be true at the end of every loop iteration
4093       above (a commoning pass followed by a substitution pass), but
4094       checking it on every iteration is excessively expensive.  Note,
4095       this loop also computes 'm' for the stats printing below it. */
4096    m = 0;
4097    n = VG_(sizeXA)( ents );
4098    for (i = 0; i < n; i++) {
4099       TyEnt *ent, *ind;
4100       ent = VG_(indexXA)( ents, i );
4101       if (ent->tag != Te_INDIR) continue;
4102       m++;
4103       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4104                                          ent->Te.INDIR.indR );
4105       vg_assert(ind);
4106       vg_assert(ind->tag != Te_INDIR);
4107    }
4108
4109    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
4110 }
4111
4112
4113 /*------------------------------------------------------------*/
4114 /*---                                                      ---*/
4115 /*--- Resolution of references to type DIEs                ---*/
4116 /*---                                                      ---*/
4117 /*------------------------------------------------------------*/
4118
4119 /* Make a pass through the (temporary) variables array.  Examine the
4120    type of each variable, check is it found, and chase any Te_INDIRs.
4121    Postcondition is: each variable has a typeR field that refers to a
4122    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
4123    not to refer to a Te_INDIR.  (This is so that we can throw all the
4124    Te_INDIRs away later). */
4125
4126 __attribute__((noinline))
4127 static void resolve_variable_types (
4128                void (*barf)( const HChar* ) __attribute__((noreturn)),
4129                /*R-O*/XArray* /* of TyEnt */ ents,
4130                /*MOD*/TyEntIndexCache* ents_cache,
4131                /*MOD*/XArray* /* of TempVar* */ vars
4132             )
4133 {
4134    Word i, n;
4135    n = VG_(sizeXA)( vars );
4136    for (i = 0; i < n; i++) {
4137       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
4138       /* This is the stated type of the variable.  But it might be
4139          an indirection, so be careful. */
4140       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4141                                                 var->typeR );
4142       if (ent && ent->tag == Te_INDIR) {
4143          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4144                                             ent->Te.INDIR.indR );
4145          vg_assert(ent);
4146          vg_assert(ent->tag != Te_INDIR);
4147       }
4148
4149       /* Deal first with "normal" cases */
4150       if (ent && ML_(TyEnt__is_type)(ent)) {
4151          var->typeR = ent->cuOff;
4152          continue;
4153       }
4154
4155       /* If there's no ent, it probably we did not manage to read a
4156          type at the cuOffset which is stated as being this variable's
4157          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
4158       if (ent == NULL) {
4159          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
4160          barf("resolve_variable_types: "
4161               "cuOff does not refer to a known type");
4162       }
4163       vg_assert(ent);
4164       /* If ent has any other tag, something bad happened, along the
4165          lines of var->typeR not referring to a type at all. */
4166       vg_assert(ent->tag == Te_UNKNOWN);
4167       /* Just accept it; the type will be useless, but at least keep
4168          going. */
4169       var->typeR = ent->cuOff;
4170    }
4171 }
4172
4173
4174 /*------------------------------------------------------------*/
4175 /*---                                                      ---*/
4176 /*--- Parsing of Compilation Units                         ---*/
4177 /*---                                                      ---*/
4178 /*------------------------------------------------------------*/
4179
4180 static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
4181    const TempVar* t1 = *(const TempVar *const *)v1;
4182    const TempVar* t2 = *(const TempVar *const *)v2;
4183    if (t1->dioff < t2->dioff) return -1;
4184    if (t1->dioff > t2->dioff) return 1;
4185    return 0;
4186 }
4187
4188 static void read_DIE (
4189    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
4190    /*MOD*/XArray* /* of TyEnt */ tyents,
4191    /*MOD*/XArray* /* of TempVar* */ tempvars,
4192    /*MOD*/XArray* /* of GExpr* */ gexprs,
4193    /*MOD*/D3TypeParser* typarser,
4194    /*MOD*/D3VarParser* varparser,
4195    /*MOD*/D3InlParser* inlparser,
4196    Cursor* c, Bool td3, CUConst* cc, Int level
4197 )
4198 {
4199    const g_abbv *abbv;
4200    ULong  atag, abbv_code;
4201    UWord  posn;
4202    UInt   has_children;
4203    UWord  start_die_c_offset;
4204    UWord  after_die_c_offset;
4205    // If the DIE we will parse has a sibling and the parser(s) are
4206    // all indicating that parse_children is not necessary, then
4207    // we will skip the children by jumping to the sibling of this DIE
4208    // (if it has a sibling).
4209    UWord  sibling = 0;
4210    Bool   parse_children = False;
4211
4212    /* --- Deal with this DIE --- */
4213    posn      = cook_die( cc, get_position_of_Cursor( c ) );
4214    abbv_code = get_ULEB128( c );
4215    abbv = get_abbv(cc, abbv_code);
4216    atag      = abbv->atag;
4217
4218    if (TD3) {
4219       TRACE_D3("\n");
4220       trace_DIE ((DW_TAG)atag, posn, level,
4221                  get_position_of_Cursor( c ), abbv, cc);
4222    }
4223
4224    if (atag == 0)
4225       cc->barf("read_DIE: invalid zero tag on DIE");
4226
4227    has_children = abbv->has_children;
4228    if (has_children != DW_children_no && has_children != DW_children_yes)
4229       cc->barf("read_DIE: invalid has_children value");
4230
4231    /* We're set up to look at the fields of this DIE.  Hand it off to
4232       any parser(s) that want to see it.  Since they will in general
4233       advance the DIE cursor, remember the current settings so that we
4234       can then back up. . */
4235    start_die_c_offset  = get_position_of_Cursor( c );
4236    after_die_c_offset  = 0; // set to c position if a parser has read the DIE.
4237
4238    if (VG_(clo_read_var_info)) {
4239       parse_type_DIE( tyents,
4240                       typarser,
4241                       (DW_TAG)atag,
4242                       posn,
4243                       level,
4244                       c,     /* DIE cursor */
4245                       abbv,  /* abbrev */
4246                       cc,
4247                       td3 );
4248       if (get_position_of_Cursor( c ) != start_die_c_offset) {
4249          after_die_c_offset = get_position_of_Cursor( c );
4250          set_position_of_Cursor( c, start_die_c_offset );
4251       }
4252
4253       parse_var_DIE( rangestree,
4254                      tempvars,
4255                      gexprs,
4256                      varparser,
4257                      (DW_TAG)atag,
4258                      posn,
4259                      level,
4260                      c,     /* DIE cursor */
4261                      abbv,  /* abbrev */
4262                      cc,
4263                      td3 );
4264       if (get_position_of_Cursor( c ) != start_die_c_offset) {
4265          after_die_c_offset = get_position_of_Cursor( c );
4266          set_position_of_Cursor( c, start_die_c_offset );
4267       }
4268
4269       parse_children = True;
4270       // type and var parsers do not have logic to skip childrens and establish
4271       // the value of sibling.
4272    }
4273
4274    if (VG_(clo_read_inline_info)) {
4275       inlparser->sibling = 0;
4276       parse_children =
4277          parse_inl_DIE( inlparser,
4278                         (DW_TAG)atag,
4279                         posn,
4280                         level,
4281                         c,     /* DIE cursor */
4282                         abbv, /* abbrev */
4283                         cc,
4284                         td3 )
4285          || parse_children;
4286       if (get_position_of_Cursor( c ) != start_die_c_offset) {
4287          after_die_c_offset = get_position_of_Cursor( c );
4288          // Last parser, no need to reset the cursor to start_die_c_offset.
4289       }
4290       if (sibling == 0)
4291          sibling = inlparser->sibling;
4292       vg_assert (inlparser->sibling == 0 || inlparser->sibling == sibling);
4293    }
4294
4295    if (after_die_c_offset > 0) {
4296       // DIE was read by a parser above, so we know where the DIE ends.
4297       set_position_of_Cursor( c, after_die_c_offset );
4298    } else {
4299       /* No parser has parsed this DIE. So, we need to skip the DIE,
4300          in order to read the next DIE.
4301          At the same time, establish sibling value if the DIE has one. */
4302       TRACE_D3("    uninteresting DIE -> skipping ...\n");
4303       skip_DIE (&sibling, c, abbv, cc);
4304    }
4305
4306    /* --- Now recurse into its children, if any
4307       and the parsing of the children is requested by a parser --- */
4308    if (has_children == DW_children_yes) {
4309       if (parse_children || sibling == 0) {
4310          if (0) TRACE_D3("BEGIN children of level %d\n", level);
4311          while (True) {
4312             atag = peek_ULEB128( c );
4313             if (atag == 0) break;
4314             read_DIE( rangestree, tyents, tempvars, gexprs,
4315                       typarser, varparser, inlparser,
4316                       c, td3, cc, level+1 );
4317          }
4318          /* Now we need to eat the terminating zero */
4319          atag = get_ULEB128( c );
4320          vg_assert(atag == 0);
4321          if (0) TRACE_D3("END children of level %d\n", level);
4322       } else {
4323          // We can skip the childrens, by jumping to the sibling
4324          TRACE_D3("    SKIPPING DIE's children,"
4325                   "jumping to sibling <%d><%lx>\n",
4326                   level, sibling);
4327          set_position_of_Cursor( c, sibling );
4328       }
4329    }
4330
4331 }
4332
4333 static void trace_debug_loc (const DebugInfo* di,
4334                              __attribute__((noreturn)) void (*barf)( const HChar* ),
4335                              DiSlice escn_debug_loc)
4336 {
4337 #if 0
4338    /* This doesn't work properly because it assumes all entries are
4339       packed end to end, with no holes.  But that doesn't always
4340       appear to be the case, so it loses sync.  And the D3 spec
4341       doesn't appear to require a no-hole situation either. */
4342    /* Display .debug_loc */
4343    Addr  dl_base;
4344    UWord dl_offset;
4345    Cursor loc; /* for showing .debug_loc */
4346    Bool td3 = di->trace_symtab;
4347
4348    TRACE_SYMTAB("\n");
4349    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
4350    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
4351    if (ML_(sli_is_valid)(escn_debug_loc)) {
4352       init_Cursor( &loc, escn_debug_loc, 0, barf,
4353                    "Overrun whilst reading .debug_loc section(1)" );
4354       dl_base = 0;
4355       dl_offset = 0;
4356       while (True) {
4357          UWord  w1, w2;
4358          UWord  len;
4359          if (is_at_end_Cursor( &loc ))
4360             break;
4361
4362          /* Read a (host-)word pair.  This is something of a hack since
4363             the word size to read is really dictated by the ELF file;
4364             however, we assume we're reading a file with the same
4365             word-sizeness as the host.  Reasonably enough. */
4366          w1 = get_UWord( &loc );
4367          w2 = get_UWord( &loc );
4368
4369          if (w1 == 0 && w2 == 0) {
4370             /* end of list.  reset 'base' */
4371             TRACE_D3("    %08lx <End of list>\n", dl_offset);
4372             dl_base = 0;
4373             dl_offset = get_position_of_Cursor( &loc );
4374             continue;
4375          }
4376
4377          if (w1 == -1UL) {
4378             /* new value for 'base' */
4379             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
4380                      dl_offset, w1, w2);
4381             dl_base = w2;
4382             continue;
4383          }
4384
4385          /* else a location expression follows */
4386          TRACE_D3("    %08lx %08lx %08lx ",
4387                   dl_offset, w1 + dl_base, w2 + dl_base);
4388          len = (UWord)get_UShort( &loc );
4389          while (len > 0) {
4390             UChar byte = get_UChar( &loc );
4391             TRACE_D3("%02x", (UInt)byte);
4392             len--;
4393          }
4394          TRACE_SYMTAB("\n");
4395       }
4396    }
4397 #endif
4398 }
4399
4400 static void trace_debug_ranges (const DebugInfo* di,
4401                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
4402                                 DiSlice escn_debug_ranges)
4403 {
4404    Cursor ranges; /* for showing .debug_ranges */
4405    Addr  dr_base;
4406    UWord dr_offset;
4407    Bool td3 = di->trace_symtab;
4408
4409    /* Display .debug_ranges */
4410    TRACE_SYMTAB("\n");
4411    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
4412    TRACE_SYMTAB("    Offset   Begin    End\n");
4413    if (ML_(sli_is_valid)(escn_debug_ranges)) {
4414       init_Cursor( &ranges, escn_debug_ranges, 0, barf,
4415                    "Overrun whilst reading .debug_ranges section(1)" );
4416       dr_base = 0;
4417       dr_offset = 0;
4418       while (True) {
4419          UWord  w1, w2;
4420
4421          if (is_at_end_Cursor( &ranges ))
4422             break;
4423
4424          /* Read a (host-)word pair.  This is something of a hack since
4425             the word size to read is really dictated by the ELF file;
4426             however, we assume we're reading a file with the same
4427             word-sizeness as the host.  Reasonably enough. */
4428          w1 = get_UWord( &ranges );
4429          w2 = get_UWord( &ranges );
4430
4431          if (w1 == 0 && w2 == 0) {
4432             /* end of list.  reset 'base' */
4433             TRACE_D3("    %08lx <End of list>\n", dr_offset);
4434             dr_base = 0;
4435             dr_offset = get_position_of_Cursor( &ranges );
4436             continue;
4437          }
4438
4439          if (w1 == -1UL) {
4440             /* new value for 'base' */
4441             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
4442                      dr_offset, w1, w2);
4443             dr_base = w2;
4444             continue;
4445          }
4446
4447          /* else a range [w1+base, w2+base) is denoted */
4448          TRACE_D3("    %08lx %08lx %08lx\n",
4449                   dr_offset, w1 + dr_base, w2 + dr_base);
4450       }
4451    }
4452 }
4453
4454 static void trace_debug_abbrev (const DebugInfo* di,
4455                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
4456                                 DiSlice escn_debug_abbv)
4457 {
4458    Cursor abbv; /* for showing .debug_abbrev */
4459    Bool td3 = di->trace_symtab;
4460
4461    /* Display .debug_abbrev */
4462    TRACE_SYMTAB("\n");
4463    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
4464    if (ML_(sli_is_valid)(escn_debug_abbv)) {
4465       init_Cursor( &abbv, escn_debug_abbv, 0, barf,
4466                    "Overrun whilst reading .debug_abbrev section" );
4467       while (True) {
4468          if (is_at_end_Cursor( &abbv ))
4469             break;
4470          /* Read one abbreviation table */
4471          TRACE_D3("  Number TAG\n");
4472          while (True) {
4473             ULong atag;
4474             UInt  has_children;
4475             ULong acode = get_ULEB128( &abbv );
4476             if (acode == 0) break; /* end of the table */
4477             atag = get_ULEB128( &abbv );
4478             has_children = get_UChar( &abbv );
4479             TRACE_D3("   %llu      %s    [%s]\n",
4480                      acode, ML_(pp_DW_TAG)(atag),
4481                             ML_(pp_DW_children)(has_children));
4482             while (True) {
4483                ULong at_name = get_ULEB128( &abbv );
4484                ULong at_form = get_ULEB128( &abbv );
4485                if (at_name == 0 && at_form == 0) break;
4486                TRACE_D3("    %-18s %s\n",
4487                         ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
4488             }
4489          }
4490       }
4491    }
4492 }
4493
4494 static
4495 void new_dwarf3_reader_wrk (
4496    DebugInfo* di,
4497    __attribute__((noreturn)) void (*barf)( const HChar* ),
4498    DiSlice escn_debug_info,      DiSlice escn_debug_types,
4499    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
4500    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
4501    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
4502    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
4503    DiSlice escn_debug_str_alt
4504 )
4505 {
4506    XArray* /* of TyEnt */     tyents = NULL;
4507    XArray* /* of TyEnt */     tyents_to_keep = NULL;
4508    XArray* /* of GExpr* */    gexprs = NULL;
4509    XArray* /* of TempVar* */  tempvars = NULL;
4510    WordFM* /* of (XArray* of AddrRange, void) */ rangestree = NULL;
4511    TyEntIndexCache* tyents_cache = NULL;
4512    TyEntIndexCache* tyents_to_keep_cache = NULL;
4513    TempVar *varp, *varp2;
4514    GExpr* gexpr;
4515    Cursor info; /* primary cursor for parsing .debug_info */
4516    D3TypeParser typarser;
4517    D3VarParser varparser;
4518    D3InlParser inlparser;
4519    Word  i, j, n;
4520    Bool td3 = di->trace_symtab;
4521    XArray* /* of TempVar* */ dioff_lookup_tab;
4522    Int pass;
4523    VgHashTable *signature_types = NULL;
4524
4525    /* Display/trace various information, if requested. */
4526    if (TD3) {
4527       trace_debug_loc    (di, barf, escn_debug_loc);
4528       trace_debug_ranges (di, barf, escn_debug_ranges);
4529       trace_debug_abbrev (di, barf, escn_debug_abbv);
4530       TRACE_SYMTAB("\n");
4531    }
4532
4533    /* Zero out all parsers. Parsers will really be initialised
4534       according to VG_(clo_read_*_info). */
4535    VG_(memset)( &inlparser, 0, sizeof(inlparser) );
4536
4537    if (VG_(clo_read_var_info)) {
4538       /* We'll park the harvested type information in here.  Also create
4539          a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
4540          have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
4541          huge and presumably will not occur in any valid DWARF3 file --
4542          it would need to have a .debug_info section 4GB long for that to
4543          happen.  These type entries end up in the DebugInfo. */
4544       tyents = VG_(newXA)( ML_(dinfo_zalloc),
4545                            "di.readdwarf3.ndrw.1 (TyEnt temp array)",
4546                            ML_(dinfo_free), sizeof(TyEnt) );
4547       { TyEnt tyent;
4548         VG_(memset)(&tyent, 0, sizeof(tyent));
4549         tyent.tag   = Te_TyVoid;
4550         tyent.cuOff = D3_FAKEVOID_CUOFF;
4551         tyent.Te.TyVoid.isFake = True;
4552         VG_(addToXA)( tyents, &tyent );
4553       }
4554       { TyEnt tyent;
4555         VG_(memset)(&tyent, 0, sizeof(tyent));
4556         tyent.tag   = Te_UNKNOWN;
4557         tyent.cuOff = D3_INVALID_CUOFF;
4558         VG_(addToXA)( tyents, &tyent );
4559       }
4560
4561       /* This is a tree used to unique-ify the range lists that are
4562          manufactured by parse_var_DIE.  References to the keys in the
4563          tree wind up in .rngMany fields in TempVars.  We'll need to
4564          delete this tree, and the XArrays attached to it, at the end of
4565          this function. */
4566       rangestree = VG_(newFM)( ML_(dinfo_zalloc),
4567                                "di.readdwarf3.ndrw.2 (rangestree)",
4568                                ML_(dinfo_free),
4569                                (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
4570
4571       /* List of variables we're accumulating.  These don't end up in the
4572          DebugInfo; instead their contents are handed to ML_(addVar) and
4573          the list elements are then deleted. */
4574       tempvars = VG_(newXA)( ML_(dinfo_zalloc),
4575                              "di.readdwarf3.ndrw.3 (TempVar*s array)",
4576                              ML_(dinfo_free),
4577                              sizeof(TempVar*) );
4578
4579       /* List of GExprs we're accumulating.  These wind up in the
4580          DebugInfo. */
4581       gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
4582                            ML_(dinfo_free), sizeof(GExpr*) );
4583
4584       /* We need a D3TypeParser to keep track of partially constructed
4585          types.  It'll be discarded as soon as we've completed the CU,
4586          since the resulting information is tipped in to 'tyents' as it
4587          is generated. */
4588       type_parser_init(&typarser);
4589
4590       var_parser_init(&varparser);
4591
4592       signature_types = VG_(HT_construct) ("signature_types");
4593    }
4594
4595    /* Do an initial pass to scan the .debug_types section, if any, and
4596       fill in the signatured types hash table.  This lets us handle
4597       mapping from a type signature to a (cooked) DIE offset directly
4598       in get_Form_contents.  */
4599    if (VG_(clo_read_var_info) && ML_(sli_is_valid)(escn_debug_types)) {
4600       init_Cursor( &info, escn_debug_types, 0, barf,
4601                    "Overrun whilst reading .debug_types section" );
4602       TRACE_D3("\n------ Collecting signatures from "
4603                ".debug_types section ------\n");
4604
4605       while (True) {
4606          UWord   cu_start_offset, cu_offset_now;
4607          CUConst cc;
4608
4609          cu_start_offset = get_position_of_Cursor( &info );
4610          TRACE_D3("\n");
4611          TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
4612          /* parse_CU_header initialises the CU's abbv hash table.  */
4613          parse_CU_Header( &cc, td3, &info, escn_debug_abbv, True, False );
4614
4615          /* Needed by cook_die.  */
4616          cc.types_cuOff_bias = escn_debug_info.szB;
4617
4618          record_signatured_type( signature_types, cc.type_signature,
4619                                  cook_die( &cc, cc.type_offset ));
4620
4621          /* Until proven otherwise we assume we don't need the icc9
4622             workaround in this case; see the DIE-reading loop below
4623             for details.  */
4624          cu_offset_now = (cu_start_offset + cc.unit_length
4625                           + (cc.is_dw64 ? 12 : 4));
4626
4627          clear_CUConst ( &cc);
4628
4629          if (cu_offset_now >= escn_debug_types.szB) {
4630             break;
4631          }
4632
4633          set_position_of_Cursor ( &info, cu_offset_now );
4634       }
4635    }
4636
4637    /* Perform three DIE-reading passes.  The first pass reads DIEs from
4638       alternate .debug_info (if any), the second pass reads DIEs from
4639       .debug_info, and the third pass reads DIEs from .debug_types.
4640       Moving the body of this loop into a separate function would
4641       require a large number of arguments to be passed in, so it is
4642       kept inline instead.  */
4643    for (pass = 0; pass < 3; ++pass) {
4644       ULong section_size;
4645
4646       if (pass == 0) {
4647          if (!ML_(sli_is_valid)(escn_debug_info_alt))
4648             continue;
4649          /* Now loop over the Compilation Units listed in the alternate
4650             .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
4651             Each compilation unit contains a Compilation Unit Header
4652             followed by precisely one DW_TAG_compile_unit or
4653             DW_TAG_partial_unit DIE. */
4654          init_Cursor( &info, escn_debug_info_alt, 0, barf,
4655                       "Overrun whilst reading alternate .debug_info section" );
4656          section_size = escn_debug_info_alt.szB;
4657
4658          TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
4659       } else if (pass == 1) {
4660          /* Now loop over the Compilation Units listed in the .debug_info
4661             section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
4662             unit contains a Compilation Unit Header followed by precisely
4663             one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
4664          init_Cursor( &info, escn_debug_info, 0, barf,
4665                       "Overrun whilst reading .debug_info section" );
4666          section_size = escn_debug_info.szB;
4667
4668          TRACE_D3("\n------ Parsing .debug_info section ------\n");
4669       } else {
4670          if (!ML_(sli_is_valid)(escn_debug_types))
4671             continue;
4672          if (!VG_(clo_read_var_info))
4673             continue; // Types not needed when only reading inline info.
4674          init_Cursor( &info, escn_debug_types, 0, barf,
4675                       "Overrun whilst reading .debug_types section" );
4676          section_size = escn_debug_types.szB;
4677
4678          TRACE_D3("\n------ Parsing .debug_types section ------\n");
4679       }
4680
4681       while (True) {
4682          ULong   cu_start_offset, cu_offset_now;
4683          CUConst cc;
4684          /* It may be that the stated size of this CU is larger than the
4685             amount of stuff actually in it.  icc9 seems to generate CUs
4686             thusly.  We use these variables to figure out if this is
4687             indeed the case, and if so how many bytes we need to skip to
4688             get to the start of the next CU.  Not skipping those bytes
4689             causes us to misidentify the start of the next CU, and it all
4690             goes badly wrong after that (not surprisingly). */
4691          UWord cu_size_including_IniLen, cu_amount_used;
4692
4693          /* It seems icc9 finishes the DIE info before debug_info_sz
4694             bytes have been used up.  So be flexible, and declare the
4695             sequence complete if there is not enough remaining bytes to
4696             hold even the smallest conceivable CU header.  (11 bytes I
4697             reckon). */
4698          /* JRS 23Jan09: I suspect this is no longer necessary now that
4699             the code below contains a 'while (cu_amount_used <
4700             cu_size_including_IniLen ...'  style loop, which skips over
4701             any leftover bytes at the end of a CU in the case where the
4702             CU's stated size is larger than its actual size (as
4703             determined by reading all its DIEs).  However, for prudence,
4704             I'll leave the following test in place.  I can't see that a
4705             CU header can be smaller than 11 bytes, so I don't think
4706             there's any harm possible through the test -- it just adds
4707             robustness. */
4708          Word avail = get_remaining_length_Cursor( &info );
4709          if (avail < 11) {
4710             if (avail > 0)
4711                TRACE_D3("new_dwarf3_reader_wrk: warning: "
4712                         "%ld unused bytes after end of DIEs\n", avail);
4713             break;
4714          }
4715
4716          if (VG_(clo_read_var_info)) {
4717             /* Check the varparser's stack is in a sane state. */
4718             vg_assert(varparser.sp == -1);
4719             /* Check the typarser's stack is in a sane state. */
4720             vg_assert(typarser.sp == -1);
4721          }
4722
4723          cu_start_offset = get_position_of_Cursor( &info );
4724          TRACE_D3("\n");
4725          TRACE_D3("  Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
4726          /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
4727          if (pass == 0) {
4728             parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
4729                              False, True );
4730          } else {
4731             parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
4732                              pass == 2, False );
4733          }
4734          cc.escn_debug_str      = pass == 0 ? escn_debug_str_alt
4735                                             : escn_debug_str;
4736          cc.escn_debug_ranges   = escn_debug_ranges;
4737          cc.escn_debug_loc      = escn_debug_loc;
4738          cc.escn_debug_line     = pass == 0 ? escn_debug_line_alt
4739                                             : escn_debug_line;
4740          cc.escn_debug_info     = pass == 0 ? escn_debug_info_alt
4741                                             : escn_debug_info;
4742          cc.escn_debug_types    = escn_debug_types;
4743          cc.escn_debug_info_alt = escn_debug_info_alt;
4744          cc.escn_debug_str_alt  = escn_debug_str_alt;
4745          cc.types_cuOff_bias    = escn_debug_info.szB;
4746          cc.alt_cuOff_bias      = escn_debug_info.szB + escn_debug_types.szB;
4747          cc.cu_start_offset     = cu_start_offset;
4748          cc.di = di;
4749          /* The CU's svma can be deduced by looking at the AT_low_pc
4750             value in the top level TAG_compile_unit, which is the topmost
4751             DIE.  We'll leave it for the 'varparser' to acquire that info
4752             and fill it in -- since it is the only party to want to know
4753             it. */
4754          cc.cu_svma_known = False;
4755          cc.cu_svma       = 0;
4756
4757          if (VG_(clo_read_var_info)) {
4758             cc.signature_types = signature_types;
4759
4760             /* Create a fake outermost-level range covering the entire
4761                address range.  So we always have *something* to catch all
4762                variable declarations. */
4763             varstack_push( &cc, &varparser, td3,
4764                            unitary_range_list(0UL, ~0UL),
4765                            -1, False/*isFunc*/, NULL/*fbGX*/ );
4766
4767             /* And set up the fndn_ix_Table.  When we come across the top
4768                level DIE for this CU (which is what the next call to
4769                read_DIE should process) we will copy all the file names out
4770                of the .debug_line img area and use this table to look up the
4771                copies when we later see filename numbers in DW_TAG_variables
4772                etc. */
4773             vg_assert(!varparser.fndn_ix_Table );
4774             varparser.fndn_ix_Table
4775                = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5var",
4776                              ML_(dinfo_free),
4777                              sizeof(UInt) );
4778          }
4779
4780          if (VG_(clo_read_inline_info)) {
4781             /* fndn_ix_Table for the inlined call parser */
4782             vg_assert(!inlparser.fndn_ix_Table );
4783             inlparser.fndn_ix_Table
4784                = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5inl",
4785                              ML_(dinfo_free),
4786                              sizeof(UInt) );
4787          }
4788
4789          /* Now read the one-and-only top-level DIE for this CU. */
4790          vg_assert(!VG_(clo_read_var_info) || varparser.sp == 0);
4791          read_DIE( rangestree,
4792                    tyents, tempvars, gexprs,
4793                    &typarser, &varparser, &inlparser,
4794                    &info, td3, &cc, 0 );
4795
4796          cu_offset_now = get_position_of_Cursor( &info );
4797
4798          if (0) VG_(printf)("Travelled: %llu  size %llu\n",
4799                             cu_offset_now - cc.cu_start_offset,
4800                             cc.unit_length + (cc.is_dw64 ? 12 : 4));
4801
4802          /* How big the CU claims it is .. */
4803          cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
4804          /* .. vs how big we have found it to be */
4805          cu_amount_used = cu_offset_now - cc.cu_start_offset;
4806
4807          if (1) TRACE_D3("offset now %llu, d-i-size %llu\n",
4808                          cu_offset_now, section_size);
4809          if (cu_offset_now > section_size)
4810             barf("toplevel DIEs beyond end of CU");
4811
4812          /* If the CU is bigger than it claims to be, we've got a serious
4813             problem. */
4814          if (cu_amount_used > cu_size_including_IniLen)
4815             barf("CU's actual size appears to be larger than it claims it is");
4816
4817          /* If the CU is smaller than it claims to be, we need to skip some
4818             bytes.  Loop updates cu_offset_new and cu_amount_used. */
4819          while (cu_amount_used < cu_size_including_IniLen
4820                 && get_remaining_length_Cursor( &info ) > 0) {
4821             if (0) VG_(printf)("SKIP\n");
4822             (void)get_UChar( &info );
4823             cu_offset_now = get_position_of_Cursor( &info );
4824             cu_amount_used = cu_offset_now - cc.cu_start_offset;
4825          }
4826
4827          if (VG_(clo_read_var_info)) {
4828             /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
4829                anywhere else at all.  Our fake the-entire-address-space
4830                range is at level -1, so preening to -2 should completely
4831                empty the stack out. */
4832             TRACE_D3("\n");
4833             varstack_preen( &varparser, td3, -2 );
4834             /* Similarly, empty the type stack out. */
4835             typestack_preen( &typarser, td3, -2 );
4836          }
4837
4838          if (VG_(clo_read_var_info)) {
4839             vg_assert(varparser.fndn_ix_Table );
4840             VG_(deleteXA)( varparser.fndn_ix_Table );
4841             varparser.fndn_ix_Table = NULL;
4842          }
4843          if (VG_(clo_read_inline_info)) {
4844             vg_assert(inlparser.fndn_ix_Table );
4845             VG_(deleteXA)( inlparser.fndn_ix_Table );
4846             inlparser.fndn_ix_Table = NULL;
4847          }
4848          clear_CUConst(&cc);
4849
4850          if (cu_offset_now == section_size)
4851             break;
4852          /* else keep going */
4853       }
4854    }
4855
4856
4857    if (VG_(clo_read_var_info)) {
4858       /* From here on we're post-processing the stuff we got
4859          out of the .debug_info section. */
4860       if (TD3) {
4861          TRACE_D3("\n");
4862          ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
4863          TRACE_D3("\n");
4864          TRACE_D3("------ Compressing type entries ------\n");
4865       }
4866
4867       tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
4868                                         sizeof(TyEntIndexCache) );
4869       ML_(TyEntIndexCache__invalidate)( tyents_cache );
4870       dedup_types( td3, tyents, tyents_cache );
4871       if (TD3) {
4872          TRACE_D3("\n");
4873          ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
4874       }
4875
4876       TRACE_D3("\n");
4877       TRACE_D3("------ Resolving the types of variables ------\n" );
4878       resolve_variable_types( barf, tyents, tyents_cache, tempvars );
4879
4880       /* Copy all the non-INDIR tyents into a new table.  For large
4881          .so's, about 90% of the tyents will by now have been resolved to
4882          INDIRs, and we no longer need them, and so don't need to store
4883          them. */
4884       tyents_to_keep
4885          = VG_(newXA)( ML_(dinfo_zalloc),
4886                        "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
4887                        ML_(dinfo_free), sizeof(TyEnt) );
4888       n = VG_(sizeXA)( tyents );
4889       for (i = 0; i < n; i++) {
4890          TyEnt* ent = VG_(indexXA)( tyents, i );
4891          if (ent->tag != Te_INDIR)
4892             VG_(addToXA)( tyents_to_keep, ent );
4893       }
4894
4895       VG_(deleteXA)( tyents );
4896       tyents = NULL;
4897       ML_(dinfo_free)( tyents_cache );
4898       tyents_cache = NULL;
4899
4900       /* Sort tyents_to_keep so we can lookup in it.  A complete (if
4901          minor) waste of time, since tyents itself is sorted, but
4902          necessary since VG_(lookupXA) refuses to cooperate if we
4903          don't. */
4904       VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4905       VG_(sortXA)( tyents_to_keep );
4906
4907       /* Enable cacheing on tyents_to_keep */
4908       tyents_to_keep_cache
4909          = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
4910                               sizeof(TyEntIndexCache) );
4911       ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
4912
4913       /* And record the tyents in the DebugInfo.  We do this before
4914          starting to hand variables to ML_(addVar), since if ML_(addVar)
4915          wants to do debug printing (of the types of said vars) then it
4916          will need the tyents.*/
4917       vg_assert(!di->admin_tyents);
4918       di->admin_tyents = tyents_to_keep;
4919
4920       /* Bias all the location expressions. */
4921       TRACE_D3("\n");
4922       TRACE_D3("------ Biasing the location expressions ------\n" );
4923
4924       n = VG_(sizeXA)( gexprs );
4925       for (i = 0; i < n; i++) {
4926          gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
4927          bias_GX( gexpr, di );
4928       }
4929
4930       TRACE_D3("\n");
4931       TRACE_D3("------ Acquired the following variables: ------\n\n");
4932
4933       /* Park (pointers to) all the vars in an XArray, so we can look up
4934          abstract origins quickly.  The array is sorted (hence, looked-up
4935          by) the .dioff fields.  Since the .dioffs should be in strictly
4936          ascending order, there is no need to sort the array after
4937          construction.  The ascendingness is however asserted for. */
4938       dioff_lookup_tab
4939          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
4940                        ML_(dinfo_free),
4941                        sizeof(TempVar*) );
4942
4943       n = VG_(sizeXA)( tempvars );
4944       Word first_primary_var = 0;
4945       for (first_primary_var = 0;
4946            escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
4947            first_primary_var++) {
4948          varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
4949          if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
4950             break;
4951       }
4952       for (i = 0; i < n; i++) {
4953          varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
4954          if (i > first_primary_var) {
4955             varp2 = *(TempVar**)VG_(indexXA)( tempvars,
4956                                               (i + first_primary_var - 1) % n );
4957             /* why should this hold?  Only, I think, because we've
4958                constructed the array by reading .debug_info sequentially,
4959                and so the array .dioff fields should reflect that, and be
4960                strictly ascending. */
4961             vg_assert(varp2->dioff < varp->dioff);
4962          }
4963          VG_(addToXA)( dioff_lookup_tab, &varp );
4964       }
4965       VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
4966       VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
4967
4968       /* Now visit each var.  Collect up as much info as possible for
4969          each var and hand it to ML_(addVar). */
4970       n = VG_(sizeXA)( tempvars );
4971       for (j = 0; j < n; j++) {
4972          TyEnt* ent;
4973          varp = *(TempVar**)VG_(indexXA)( tempvars, j );
4974
4975          /* Possibly show .. */
4976          if (TD3) {
4977             VG_(printf)("<%lx> addVar: level %d: %s :: ",
4978                         varp->dioff,
4979                         varp->level,
4980                         varp->name ? varp->name : "<anon_var>" );
4981             if (varp->typeR) {
4982                ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
4983             } else {
4984                VG_(printf)("NULL");
4985             }
4986             VG_(printf)("\n  Loc=");
4987             if (varp->gexpr) {
4988                ML_(pp_GX)(varp->gexpr);
4989             } else {
4990                VG_(printf)("NULL");
4991             }
4992             VG_(printf)("\n");
4993             if (varp->fbGX) {
4994                VG_(printf)("  FrB=");
4995                ML_(pp_GX)( varp->fbGX );
4996                VG_(printf)("\n");
4997             } else {
4998                VG_(printf)("  FrB=none\n");
4999             }
5000             VG_(printf)("  declared at: %u %s:%d\n",
5001                         varp->fndn_ix,
5002                         ML_(fndn_ix2filename) (di, varp->fndn_ix),
5003                         varp->fLine );
5004             if (varp->absOri != (UWord)D3_INVALID_CUOFF)
5005                VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
5006          }
5007
5008          /* Skip variables which have no location.  These must be
5009             abstract instances; they are useless as-is since with no
5010             location they have no specified memory location.  They will
5011             presumably be referred to via the absOri fields of other
5012             variables. */
5013          if (!varp->gexpr) {
5014             TRACE_D3("  SKIP (no location)\n\n");
5015             continue;
5016          }
5017
5018          /* So it has a location, at least.  If it refers to some other
5019             entry through its absOri field, pull in further info through
5020             that. */
5021          if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
5022             Bool found;
5023             Word ixFirst, ixLast;
5024             TempVar key;
5025             TempVar* keyp = &key;
5026             TempVar *varAI;
5027             VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
5028             key.dioff = varp->absOri; /* this is what we want to find */
5029             found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
5030                                    &ixFirst, &ixLast );
5031             if (!found) {
5032                /* barf("DW_AT_abstract_origin can't be resolved"); */
5033                TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
5034                continue;
5035             }
5036             /* If the following fails, there is more than one entry with
5037                the same dioff.  Which can't happen. */
5038             vg_assert(ixFirst == ixLast);
5039             varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
5040             /* stay sane */
5041             vg_assert(varAI);
5042             vg_assert(varAI->dioff == varp->absOri);
5043
5044             /* Copy what useful info we can. */
5045             if (varAI->typeR && !varp->typeR)
5046                varp->typeR = varAI->typeR;
5047             if (varAI->name && !varp->name)
5048                varp->name = varAI->name;
5049             if (varAI->fndn_ix && !varp->fndn_ix)
5050                varp->fndn_ix = varAI->fndn_ix;
5051             if (varAI->fLine > 0 && varp->fLine == 0)
5052                varp->fLine = varAI->fLine;
5053          }
5054
5055          /* Give it a name if it doesn't have one. */
5056          if (!varp->name)
5057             varp->name = ML_(addStr)( di, "<anon_var>", -1 );
5058
5059          /* So now does it have enough info to be useful? */
5060          /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
5061             the type didn't get resolved.  Really, in that case
5062             something's broken earlier on, and should be fixed, rather
5063             than just skipping the variable. */
5064          ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
5065                                             tyents_to_keep_cache,
5066                                             varp->typeR );
5067          /* The next two assertions should be guaranteed by
5068             our previous call to resolve_variable_types. */
5069          vg_assert(ent);
5070          vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
5071
5072          if (ent->tag == Te_UNKNOWN) continue;
5073
5074          vg_assert(varp->gexpr);
5075          vg_assert(varp->name);
5076          vg_assert(varp->typeR);
5077          vg_assert(varp->level >= 0);
5078
5079          /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
5080             each address range in which the variable exists. */
5081          TRACE_D3("  ACQUIRE for range(s) ");
5082          { AddrRange  oneRange;
5083            AddrRange* varPcRanges;
5084            Word       nVarPcRanges;
5085            /* Set up to iterate over address ranges, however
5086               represented. */
5087            if (varp->nRanges == 0 || varp->nRanges == 1) {
5088               vg_assert(!varp->rngMany);
5089               if (varp->nRanges == 0) {
5090                  vg_assert(varp->rngOneMin == 0);
5091                  vg_assert(varp->rngOneMax == 0);
5092               }
5093               nVarPcRanges = varp->nRanges;
5094               oneRange.aMin = varp->rngOneMin;
5095               oneRange.aMax = varp->rngOneMax;
5096               varPcRanges = &oneRange;
5097            } else {
5098               vg_assert(varp->rngMany);
5099               vg_assert(varp->rngOneMin == 0);
5100               vg_assert(varp->rngOneMax == 0);
5101               nVarPcRanges = VG_(sizeXA)(varp->rngMany);
5102               vg_assert(nVarPcRanges >= 2);
5103               vg_assert(nVarPcRanges == (Word)varp->nRanges);
5104               varPcRanges = VG_(indexXA)(varp->rngMany, 0);
5105            }
5106            if (varp->level == 0)
5107               vg_assert( nVarPcRanges == 1 );
5108            /* and iterate */
5109            for (i = 0; i < nVarPcRanges; i++) {
5110               Addr pcMin = varPcRanges[i].aMin;
5111               Addr pcMax = varPcRanges[i].aMax;
5112               vg_assert(pcMin <= pcMax);
5113               /* Level 0 is the global address range.  So at level 0 we
5114                  don't want to bias pcMin/pcMax; but at all other levels
5115                  we do since those are derived from svmas in the Dwarf
5116                  we're reading.  Be paranoid ... */
5117               if (varp->level == 0) {
5118                  vg_assert(pcMin == (Addr)0);
5119                  vg_assert(pcMax == ~(Addr)0);
5120               } else {
5121                  /* vg_assert(pcMin > (Addr)0);
5122                     No .. we can legitimately expect to see ranges like
5123                     0x0-0x11D (pre-biasing, of course). */
5124                  vg_assert(pcMax < ~(Addr)0);
5125               }
5126
5127               /* Apply text biasing, for non-global variables. */
5128               if (varp->level > 0) {
5129                  pcMin += di->text_debug_bias;
5130                  pcMax += di->text_debug_bias;
5131               }
5132
5133               if (i > 0 && (i%2) == 0)
5134                  TRACE_D3("\n                       ");
5135               TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
5136
5137               ML_(addVar)(
5138                  di, varp->level,
5139                      pcMin, pcMax,
5140                      varp->name,  varp->typeR,
5141                      varp->gexpr, varp->fbGX,
5142                      varp->fndn_ix, varp->fLine, td3
5143               );
5144            }
5145          }
5146
5147          TRACE_D3("\n\n");
5148          /* and move on to the next var */
5149       }
5150
5151       /* Now free all the TempVars */
5152       n = VG_(sizeXA)( tempvars );
5153       for (i = 0; i < n; i++) {
5154          varp = *(TempVar**)VG_(indexXA)( tempvars, i );
5155          ML_(dinfo_free)(varp);
5156       }
5157       VG_(deleteXA)( tempvars );
5158       tempvars = NULL;
5159
5160       /* and the temp lookup table */
5161       VG_(deleteXA)( dioff_lookup_tab );
5162
5163       /* and the ranges tree.  Note that we need to also free the XArrays
5164          which constitute the keys, hence pass VG_(deleteXA) as a
5165          key-finalizer. */
5166       VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
5167
5168       /* and the tyents_to_keep cache */
5169       ML_(dinfo_free)( tyents_to_keep_cache );
5170       tyents_to_keep_cache = NULL;
5171
5172       vg_assert( varparser.fndn_ix_Table == NULL );
5173
5174       /* And the signatured type hash.  */
5175       VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
5176
5177       /* record the GExprs in di so they can be freed later */
5178       vg_assert(!di->admin_gexprs);
5179       di->admin_gexprs = gexprs;
5180    }
5181
5182    // Free up dynamically allocated memory
5183    if (VG_(clo_read_var_info)) {
5184       type_parser_release(&typarser);
5185       var_parser_release(&varparser);
5186    }
5187 }
5188
5189
5190 /*------------------------------------------------------------*/
5191 /*---                                                      ---*/
5192 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
5193 /*---                                                      ---*/
5194 /*------------------------------------------------------------*/
5195
5196 static Bool               d3rd_jmpbuf_valid  = False;
5197 static const HChar*       d3rd_jmpbuf_reason = NULL;
5198 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
5199
5200 static __attribute__((noreturn)) void barf ( const HChar* reason ) {
5201    vg_assert(d3rd_jmpbuf_valid);
5202    d3rd_jmpbuf_reason = reason;
5203    VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
5204    /*NOTREACHED*/
5205    vg_assert(0);
5206 }
5207
5208
5209 void
5210 ML_(new_dwarf3_reader) (
5211    DebugInfo* di,
5212    DiSlice escn_debug_info,      DiSlice escn_debug_types,
5213    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
5214    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
5215    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
5216    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
5217    DiSlice escn_debug_str_alt
5218 )
5219 {
5220    volatile Int  jumped;
5221    volatile Bool td3 = di->trace_symtab;
5222
5223    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
5224       just returns normally.  If there is any failure, it longjmp's
5225       back here, having first set d3rd_jmpbuf_reason to something
5226       useful. */
5227    vg_assert(d3rd_jmpbuf_valid  == False);
5228    vg_assert(d3rd_jmpbuf_reason == NULL);
5229
5230    d3rd_jmpbuf_valid = True;
5231    jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
5232    if (jumped == 0) {
5233       /* try this ... */
5234       new_dwarf3_reader_wrk( di, barf,
5235                              escn_debug_info,     escn_debug_types,
5236                              escn_debug_abbv,     escn_debug_line,
5237                              escn_debug_str,      escn_debug_ranges,
5238                              escn_debug_loc,      escn_debug_info_alt,
5239                              escn_debug_abbv_alt, escn_debug_line_alt,
5240                              escn_debug_str_alt );
5241       d3rd_jmpbuf_valid = False;
5242       TRACE_D3("\n------ .debug_info reading was successful ------\n");
5243    } else {
5244       /* It longjmp'd. */
5245       d3rd_jmpbuf_valid = False;
5246       /* Can't longjump without giving some sort of reason. */
5247       vg_assert(d3rd_jmpbuf_reason != NULL);
5248
5249       TRACE_D3("\n------ .debug_info reading failed ------\n");
5250
5251       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
5252    }
5253
5254    d3rd_jmpbuf_valid  = False;
5255    d3rd_jmpbuf_reason = NULL;
5256 }
5257
5258
5259
5260 /* --- Unused code fragments which might be useful one day. --- */
5261
5262 #if 0
5263    /* Read the arange tables */
5264    TRACE_SYMTAB("\n");
5265    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
5266    init_Cursor( &aranges, debug_aranges_img,
5267                 debug_aranges_sz, 0, barf,
5268                 "Overrun whilst reading .debug_aranges section" );
5269    while (True) {
5270       ULong  len, d_i_offset;
5271       Bool   is64;
5272       UShort version;
5273       UChar  asize, segsize;
5274
5275       if (is_at_end_Cursor( &aranges ))
5276          break;
5277       /* Read one arange thingy */
5278       /* initial_length field */
5279       len = get_Initial_Length( &is64, &aranges,
5280                "in .debug_aranges: invalid initial-length field" );
5281       version    = get_UShort( &aranges );
5282       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
5283       asize      = get_UChar( &aranges );
5284       segsize    = get_UChar( &aranges );
5285       TRACE_D3("  Length:                   %llu\n", len);
5286       TRACE_D3("  Version:                  %d\n", (Int)version);
5287       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
5288       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
5289       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
5290       TRACE_D3("\n");
5291       TRACE_D3("    Address            Length\n");
5292
5293       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
5294          (void)get_UChar( & aranges );
5295       }
5296       while (True) {
5297          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
5298          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
5299          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
5300          if (address == 0 && length == 0) break;
5301       }
5302    }
5303    TRACE_SYMTAB("\n");
5304 #endif
5305
5306 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
5307
5308 /*--------------------------------------------------------------------*/
5309 /*--- end                                                          ---*/
5310 /*--------------------------------------------------------------------*/