coregrind/m_debuginfo/readdwarf3.c

   1 /* -*- mode: C; c-basic-offset: 3; -*- */
   2
   3 /*--------------------------------------------------------------------*/
   4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
   5 /*---                                                 readdwarf3.c ---*/
   6 /*--------------------------------------------------------------------*/
   7
   8 /*
   9    This file is part of Valgrind, a dynamic binary instrumentation
  10    framework.
  11
  12    Copyright (C) 2008-2017 OpenWorks LLP
  13       info@open-works.co.uk
  14
  15    This program is free software; you can redistribute it and/or
  16    modify it under the terms of the GNU General Public License as
  17    published by the Free Software Foundation; either version 2 of the
  18    License, or (at your option) any later version.
  19
  20    This program is distributed in the hope that it will be useful, but
  21    WITHOUT ANY WARRANTY; without even the implied warranty of
  22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23    General Public License for more details.
  24
  25    You should have received a copy of the GNU General Public License
  26    along with this program; if not, see <http://www.gnu.org/licenses/>.
  27
  28    The GNU General Public License is contained in the file COPYING.
  29
  30    Neither the names of the U.S. Department of Energy nor the
  31    University of California nor the names of its contributors may be
  32    used to endorse or promote products derived from this software
  33    without prior written permission.
  34 */
  35
  36 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_freebsd)
  37
  38 /* REFERENCE (without which this code will not make much sense):
  39
  40    DWARF Debugging Information Format, Version 3,
  41    dated 20 December 2005 (the "D3 spec").
  42
  43    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
  44    .doc (MS Word) version, but for some reason the section numbers
  45    between the Word and PDF versions differ by 1 in the first digit.
  46    All section references in this code are to the PDF version.
  47
  48    CURRENT HACKS:
  49
  50    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
  51       assumed to mean "const void" or "volatile void" respectively.
  52       GDB appears to interpret them like this, anyway.
  53
  54    In many cases it is important to know the svma of a CU (the "base
  55    address of the CU", as the D3 spec calls it).  There are some
  56    situations in which the spec implies this value is unknown, but the
  57    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
  58    merely zero when not explicitly stated.  So we too have to make
  59    that assumption.
  60
  61    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
  62    unitary_range_list() bias the resulting range list in the same way
  63    that its more general cousin, get_range_list(), does?  I don't
  64    know.
  65
  66    TODO, 2008 Feb 17:
  67
  68    get rid of cu_svma_known and document the assumed-zero svma hack.
  69
  70    ML_(sizeOfType): differentiate between zero sized types and types
  71    for which the size is unknown.  Is this important?  I don't know.
  72
  73    DW_TAG_array_types: deal with explicit sizes (currently we compute
  74    the size from the bounds and the element size, although that's
  75    fragile, if the bounds incompletely specified, or completely
  76    absent)
  77
  78    Document reason for difference (by 1) of stack preening depth in
  79    parse_var_DIE vs parse_type_DIE.
  80
  81    Don't hand to ML_(addVars), vars whose locations are entirely in
  82    registers (DW_OP_reg*).  This is merely a space-saving
  83    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
  84    expressions correctly, by failing to evaluate them and hence
  85    effectively ignoring the variable with which they are associated.
  86
  87    Deal with DW_TAG_array_types which have element size != stride
  88
  89    In some cases, the info for a variable is split between two
  90    different DIEs (generally a declarer and a definer).  We punt on
  91    these.  Could do better here.
  92
  93    The 'data_bias' argument passed to the expression evaluator
  94    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
  95    MaybeUWord, to make it clear when we do vs don't know what it is
  96    for the evaluation of an expression.  At the moment zero is passed
  97    for this parameter in the don't know case.  That's a bit fragile
  98    and obscure; using a MaybeUWord would be clearer.
  99
 100    POTENTIAL PERFORMANCE IMPROVEMENTS:
 101
 102    Currently, duplicate removal and all other queries for the type
 103    entities array is done using cuOffset-based pointing, which
 104    involves a binary search (VG_(lookupXA)) for each access.  This is
 105    wildly inefficient, although simple.  It would be better to
 106    translate all the cuOffset-based references (iow, all the "R" and
 107    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
 108    'tyents' right at the start of dedup_types(), and use direct
 109    indexing (VG_(indexXA)) wherever possible after that.
 110
 111    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
 112    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
 113    points, and possibly also make an _UNCHECKED version which skips
 114    the range checks in performance-critical situations such as this.
 115
 116    Handle interaction between read_DIE and parse_{var,type}_DIE
 117    better.  Currently read_DIE reads the entire DIE just to find where
 118    the end is (and for debug printing), so that it can later reliably
 119    move the cursor to the end regardless of what parse_{var,type}_DIE
 120    do.  This means many DIEs (most, even?) are read twice.  It would
 121    be smarter to make parse_{var,type}_DIE return a Bool indicating
 122    whether or not they advanced the DIE cursor, and only if they
 123    didn't should read_DIE itself read through the DIE.
 124
 125    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
 126    zero variables in their .vars XArray.  Rather than have an XArray
 127    with zero elements (which uses 2 malloc'd blocks), allow the .vars
 128    pointer to be NULL in this case.
 129
 130    More generally, reduce the amount of memory allocated and freed
 131    while reading Dwarf3 type/variable information.  Even modest (20MB)
 132    objects cause this module to allocate and free hundreds of
 133    thousands of small blocks, and ML_(arena_malloc) and its various
 134    groupies always show up at the top of performance profiles. */
 135
 136 #include "pub_core_basics.h"
 137 #include "pub_core_debuginfo.h"
 138 #include "pub_core_libcbase.h"
 139 #include "pub_core_libcassert.h"
 140 #include "pub_core_libcprint.h"
 141 #include "pub_core_libcsetjmp.h"   // setjmp facilities
 142 #include "pub_core_hashtable.h"
 143 #include "pub_core_options.h"
 144 #include "pub_core_tooliface.h"    /* VG_(needs) */
 145 #include "pub_core_xarray.h"
 146 #include "pub_core_wordfm.h"
 147 #include "priv_misc.h"             /* dinfo_zalloc/free */
 148 #include "priv_image.h"
 149 #include "priv_tytypes.h"
 150 #include "priv_d3basics.h"
 151 #include "priv_storage.h"
 152 #include "priv_readdwarf3.h"       /* self */
 153
 154
 155 /*------------------------------------------------------------*/
 156 /*---                                                      ---*/
 157 /*--- Basic machinery for parsing DIEs.                    ---*/
 158 /*---                                                      ---*/
 159 /*------------------------------------------------------------*/
 160
 161 #define TRACE_D3(format, args...) \
 162    if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
 163 #define TD3 (UNLIKELY(td3))
 164
 165 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
 166 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
 167
 168 typedef
 169    struct {
 170       DiSlice sli;      // to which this cursor applies
 171       DiOffT  sli_next; // offset in underlying DiImage; must be >= sli.ioff
 172       void (*barf)( const HChar* ) __attribute__((noreturn));
 173       const HChar* barfstr;
 174    }
 175    Cursor;
 176
 177 static inline Bool is_sane_Cursor ( const Cursor* c ) {
 178    if (!c)                return False;
 179    if (!c->barf)          return False;
 180    if (!c->barfstr)       return False;
 181    if (!ML_(sli_is_valid)(c->sli))    return False;
 182    if (c->sli.ioff == DiOffT_INVALID) return False;
 183    if (c->sli_next < c->sli.ioff)     return False;
 184    return True;
 185 }
 186
 187 // Initialise a cursor from a DiSlice (ELF section, really) so as to
 188 // start reading at offset |sli_initial_offset| from the start of the
 189 // slice.
 190 static void init_Cursor ( /*OUT*/Cursor* c,
 191                           DiSlice sli,
 192                           ULong   sli_initial_offset,
 193                           __attribute__((noreturn)) void (*barf)(const HChar*),
 194                           const HChar* barfstr )
 195 {
 196    vg_assert(c);
 197    VG_(bzero_inline)(c, sizeof(*c));
 198    c->sli              = sli;
 199    c->sli_next         = c->sli.ioff + sli_initial_offset;
 200    c->barf             = barf;
 201    c->barfstr          = barfstr;
 202    vg_assert(is_sane_Cursor(c));
 203 }
 204
 205 static Bool is_at_end_Cursor ( const Cursor* c ) {
 206    vg_assert(is_sane_Cursor(c));
 207    return c->sli_next >= c->sli.ioff + c->sli.szB;
 208 }
 209
 210 static inline ULong get_position_of_Cursor ( const Cursor* c ) {
 211    vg_assert(is_sane_Cursor(c));
 212    return c->sli_next - c->sli.ioff;
 213 }
 214 static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
 215    c->sli_next = c->sli.ioff + pos;
 216    vg_assert(is_sane_Cursor(c));
 217 }
 218 static inline void advance_position_of_Cursor ( Cursor* c, ULong delta ) {
 219    c->sli_next += delta;
 220    vg_assert(is_sane_Cursor(c));
 221 }
 222
 223 static /*signed*/Long get_remaining_length_Cursor ( const Cursor* c ) {
 224    vg_assert(is_sane_Cursor(c));
 225    return c->sli.ioff + c->sli.szB - c->sli_next;
 226 }
 227
 228 //static void* get_address_of_Cursor ( Cursor* c ) {
 229 //   vg_assert(is_sane_Cursor(c));
 230 //   return &c->region_start_img[ c->region_next ];
 231 //}
 232
 233 static DiCursor get_DiCursor_from_Cursor ( const Cursor* c ) {
 234    return mk_DiCursor(c->sli.img, c->sli_next);
 235 }
 236
 237 /* FIXME: document assumptions on endianness for
 238    get_UShort/UInt/ULong. */
 239 static inline UChar get_UChar ( Cursor* c ) {
 240    UChar r;
 241    vg_assert(is_sane_Cursor(c));
 242    if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
 243       c->barf(c->barfstr);
 244       /*NOTREACHED*/
 245       vg_assert(0);
 246    }
 247    r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
 248    c->sli_next += sizeof(UChar);
 249    return r;
 250 }
 251 static UShort get_UShort ( Cursor* c ) {
 252    UShort r;
 253    vg_assert(is_sane_Cursor(c));
 254    if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
 255       c->barf(c->barfstr);
 256       /*NOTREACHED*/
 257       vg_assert(0);
 258    }
 259    r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
 260    c->sli_next += sizeof(UShort);
 261    return r;
 262 }
 263 static UInt get_UInt ( Cursor* c ) {
 264    UInt r;
 265    vg_assert(is_sane_Cursor(c));
 266    if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
 267       c->barf(c->barfstr);
 268       /*NOTREACHED*/
 269       vg_assert(0);
 270    }
 271    r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
 272    c->sli_next += sizeof(UInt);
 273    return r;
 274 }
 275 static ULong get_ULong ( Cursor* c ) {
 276    ULong r;
 277    vg_assert(is_sane_Cursor(c));
 278    if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
 279       c->barf(c->barfstr);
 280       /*NOTREACHED*/
 281       vg_assert(0);
 282    }
 283    r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
 284    c->sli_next += sizeof(ULong);
 285    return r;
 286 }
 287 static ULong get_ULEB128 ( Cursor* c ) {
 288    ULong result;
 289    Int   shift;
 290    UChar byte;
 291    /* unroll first iteration */
 292    byte = get_UChar( c );
 293    result = (ULong)(byte & 0x7f);
 294    if (LIKELY(!(byte & 0x80))) return result;
 295    shift = 7;
 296    /* end unroll first iteration */
 297    do {
 298       byte = get_UChar( c );
 299       result |= ((ULong)(byte & 0x7f)) << shift;
 300       shift += 7;
 301    } while (byte & 0x80);
 302    return result;
 303 }
 304 static Long get_SLEB128 ( Cursor* c ) {
 305    ULong  result = 0;
 306    Int    shift = 0;
 307    UChar  byte;
 308    do {
 309       byte = get_UChar(c);
 310       result |= ((ULong)(byte & 0x7f)) << shift;
 311       shift += 7;
 312    } while (byte & 0x80);
 313    if (shift < 64 && (byte & 0x40))
 314       result |= -(1ULL << shift);
 315    return result;
 316 }
 317 static UInt get_UInt3 ( Cursor* c ) {
 318    UChar c1, c2, c3;
 319    vg_assert(is_sane_Cursor(c));
 320    if (c->sli_next + 3 > c->sli.ioff + c->sli.szB) {
 321       c->barf(c->barfstr);
 322       /*NOTREACHED*/
 323       vg_assert(0);
 324    }
 325    c1 = ML_(img_get_UChar)(c->sli.img, c->sli_next);
 326    c2 = ML_(img_get_UChar)(c->sli.img, c->sli_next+1);
 327    c3 = ML_(img_get_UChar)(c->sli.img, c->sli_next+2);
 328    c->sli_next += 3;
 329 #if defined(VG_BIGENDIAN)
 330    return c1 << 16 | c2 << 8 | c3;
 331 #else
 332    return c1 | c2 << 8 | c3 << 16;
 333 #endif
 334 }
 335
 336
 337 /* Assume 'c' points to the start of a string.  Return a DiCursor of
 338    whatever it points at, and advance it past the terminating zero.
 339    This makes it safe for the caller to then copy the string with
 340    ML_(addStr), since (w.r.t. image overruns) the process of advancing
 341    past the terminating zero will already have "vetted" the string. */
 342 static DiCursor get_AsciiZ ( Cursor* c ) {
 343    UChar uc;
 344    DiCursor res = get_DiCursor_from_Cursor(c);
 345    do { uc = get_UChar(c); } while (uc != 0);
 346    return res;
 347 }
 348
 349 static ULong peek_ULEB128 ( Cursor* c ) {
 350    DiOffT here = c->sli_next;
 351    ULong  r    = get_ULEB128( c );
 352    c->sli_next = here;
 353    return r;
 354 }
 355 static UChar peek_UChar ( Cursor* c ) {
 356    DiOffT here = c->sli_next;
 357    UChar  r    = get_UChar( c );
 358    c->sli_next = here;
 359    return r;
 360 }
 361
 362 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
 363    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
 364 }
 365
 366 static UWord get_UWord ( Cursor* c ) {
 367    vg_assert(sizeof(UWord) == sizeof(void*));
 368    if (sizeof(UWord) == 4) return get_UInt(c);
 369    if (sizeof(UWord) == 8) return get_ULong(c);
 370    vg_assert(0);
 371 }
 372
 373 /* Read a DWARF3 'Initial Length' field */
 374 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
 375                                   Cursor* c,
 376                                   const HChar* barfMsg )
 377 {
 378    ULong w64;
 379    UInt  w32;
 380    *is64 = False;
 381    w32 = get_UInt( c );
 382    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
 383       c->barf( barfMsg );
 384    }
 385    else if (w32 == 0xFFFFFFFF) {
 386       *is64 = True;
 387       w64   = get_ULong( c );
 388    } else {
 389       *is64 = False;
 390       w64 = (ULong)w32;
 391    }
 392    return w64;
 393 }
 394
 395
 396 /*------------------------------------------------------------*/
 397 /*---                                                      ---*/
 398 /*--- "CUConst" structure                                  ---*/
 399 /*---                                                      ---*/
 400 /*------------------------------------------------------------*/
 401
 402 typedef
 403    struct _name_form {
 404       ULong at_name;  // Dwarf Attribute name
 405       ULong at_form;  // Dwarf Attribute form
 406       Long  at_val;   // Dwarf Attribute value (for implicit_const)
 407       UInt  skip_szB; // Nr of bytes skippable from here ...
 408       UInt  next_nf;  // ... to reach this attr/form index in the g_abbv.nf
 409    } name_form;
 410 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
 411    Each name_form maintains how many (fixed) nr of bytes can be skipped from
 412    the beginning of this form till the next attr/form to look at.
 413    The next form to look can be:
 414        an 'interesting' attr/form to read while skipping a DIE
 415           (currently, this is only DW_AT_sibling)
 416    or
 417        a variable length form which must be read to be skipped.
 418    For a variable length form, the skip_szB will be equal to VARSZ_FORM.
 419
 420    Note: this technique could also be used to speed up the parsing
 421    of DIEs : for each parser kind, we could have the nr of bytes
 422    to skip to directly reach the interesting form(s) for the parser. */
 423
 424 typedef
 425    struct _g_abbv {
 426       struct _g_abbv *next; // read/write by hash table.
 427       UWord  abbv_code;     // key, read by hash table
 428       ULong  atag;
 429       ULong  has_children;
 430       name_form nf[0];
 431       /* Variable-length array of name/form pairs, terminated
 432          by a 0/0 pair.
 433          The skip_szB/next_nf allows to skip efficiently a DIE
 434          described by this g_abbv; */
 435     } g_abbv;
 436
 437 /* Holds information about the .debug_abbrev section for this CU.  The current
 438   Cursor into the abbrev section, the known abbrev codes are but into an hash
 439   table.  The (starting) offset into the abbrev_offset can be used to check
 440   whether the abbv can be shared between CUs.  The done boolean is set when all
 441   known codes have been read.  Initialize a new abbv_state with init_ht_abbvs.
 442   To read any new abbrev codes not yet in the hash table call find_ht_abbvs
 443   (get_abbv will first query the ht_abbvs, then if not done, call
 444   find_ht_abbvs).  */
 445 typedef
 446    struct _abbv_state {
 447       Cursor c; /* Current cursor into .debug_abbrev.  */
 448       VgHashTable *ht_abbvs; /* Hash table mapping codes to abbrevs.  */
 449       ULong debug_abbrev_offset; /* Starting offset into .debug_abbrev.  */
 450       Bool done; /* Whether there (might) still be new abbrev codes not yet
 451                     in the cache.  */
 452    } abbv_state;
 453
 454 /* Holds information that is constant through the parsing of a
 455    Compilation Unit.  This is basically plumbed through to
 456    everywhere. */
 457 typedef
 458    struct {
 459       /* Call here if anything goes wrong */
 460       void (*barf)( const HChar* ) __attribute__((noreturn));
 461       /* Is this 64-bit DWARF ? */
 462       Bool   is_dw64;
 463       /* Which DWARF version ?  (2, 3, 4 or 5) */
 464       UShort version;
 465       /* Length of this Compilation Unit, as stated in the
 466          .unit_length :: InitialLength field of the CU Header.
 467          However, this size (as specified by the D3 spec) does not
 468          include the size of the .unit_length field itself, which is
 469          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
 470          can be obtained through the expression ".is_dw64 ? 12 : 4". */
 471       ULong  unit_length;
 472       /* Offset of start of this unit in .debug_info */
 473       UWord  cu_start_offset;
 474       /* SVMA for this CU.  In the D3 spec, is known as the "base
 475          address of the compilation unit (last para sec 3.1.1).
 476          Needed for (amongst things) interpretation of location-list
 477          values. */
 478       Addr   cu_svma;
 479       Bool   cu_svma_known;
 480
 481       /* The debug_abbreviations table to be used for this Unit */
 482       //UChar* debug_abbv;
 483       /* Upper bound on size thereof (an overestimate, in general) */
 484       //UWord  debug_abbv_maxszB;
 485       /* A bounded area of the image, to be used as the
 486          debug_abbreviations table tobe used for this Unit. */
 487       DiSlice debug_abbv;
 488
 489       /* Image information for various sections. */
 490       DiSlice escn_debug_str;
 491       DiSlice escn_debug_ranges;
 492       DiSlice escn_debug_rnglists;
 493       DiSlice escn_debug_loclists;
 494       DiSlice escn_debug_loc;
 495       DiSlice escn_debug_line;
 496       DiSlice escn_debug_info;
 497       DiSlice escn_debug_types;
 498       DiSlice escn_debug_info_alt;
 499       DiSlice escn_debug_str_alt;
 500       DiSlice escn_debug_line_str;
 501       DiSlice escn_debug_addr;
 502       DiSlice escn_debug_str_offsets;
 503       /* How much to add to .debug_types resp. alternate .debug_info offsets
 504          in cook_die*.  */
 505       UWord  types_cuOff_bias;
 506       UWord  alt_cuOff_bias;
 507       /* DW_AT_addr_base */
 508       Addr   cu_addr_base;
 509       Bool   cu_has_addr_base;
 510       /* DW_AT_str_offsets_base */
 511       Addr   cu_str_offsets_base;
 512       Bool   cu_has_str_offsets_base;
 513       /* DW_AT_rnglists_base */
 514       Addr   cu_rnglists_base;
 515       Bool   cu_has_rnglists_base;
 516       /* DW_AT_loclists_base */
 517       Addr   cu_loclists_base;
 518       Bool   cu_has_loclists_base;
 519       /* --- Needed so we can add stuff to the string table. --- */
 520       struct _DebugInfo* di;
 521       /* --- State of the hash table of g_abbv (i.e. parsed abbreviations)
 522              technically makes this struct not const.  --- */
 523       abbv_state abbv;
 524
 525       /* True if this came from .debug_types; otherwise it came from
 526          .debug_info.  */
 527       Bool is_type_unit;
 528       /* For a unit coming from .debug_types, these hold the TU's type
 529          signature and the uncooked DIE offset of the TU's signatured
 530          type.  For a unit coming from .debug_info, these are unused.  */
 531       ULong type_signature;
 532       ULong type_offset;
 533
 534       /* Signatured type hash; computed once and then shared by all
 535          CUs.  */
 536       VgHashTable *signature_types;
 537
 538       /* True if this came from alternate .debug_info; otherwise
 539          it came from normal .debug_info or .debug_types.  */
 540       Bool is_alt_info;
 541    }
 542    CUConst;
 543
 544
 545 /* Return the cooked value of DIE depending on whether CC represents a
 546    .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
 547    .debug_types and optional alternate .debug_info sections form
 548    a contiguous whole, so that DIEs coming from .debug_types are numbered
 549    starting at the end of .debug_info and DIEs coming from alternate
 550    .debug_info are numbered starting at the end of .debug_types.  */
 551 static UWord cook_die( const CUConst* cc, UWord die )
 552 {
 553    if (cc->is_type_unit)
 554       die += cc->types_cuOff_bias;
 555    else if (cc->is_alt_info)
 556       die += cc->alt_cuOff_bias;
 557    return die;
 558 }
 559
 560 /* Like cook_die, but understand that DIEs coming from a
 561    DW_FORM_ref_sig8 reference are already cooked.  Also, handle
 562    DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
 563    as reference to alternate .debug_info.  */
 564 static UWord cook_die_using_form( const CUConst *cc, UWord die, DW_FORM form)
 565 {
 566    if (form == DW_FORM_ref_sig8)
 567       return die;
 568    if (form == DW_FORM_GNU_ref_alt)
 569       return die + cc->alt_cuOff_bias;
 570    return cook_die( cc, die );
 571 }
 572
 573 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
 574    came from the .debug_types section and *ALT_FLAG to true if the DIE
 575    came from alternate .debug_info section.  */
 576 static UWord uncook_die( const CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
 577                          Bool *alt_flag )
 578 {
 579    *alt_flag = False;
 580    *type_flag = False;
 581    /* The use of escn_debug_{info,types}.szB seems safe to me even if
 582       escn_debug_{info,types} are DiSlice_INVALID (meaning the
 583       sections were not found), because DiSlice_INVALID.szB is always
 584       zero.  That said, it seems unlikely we'd ever get here if
 585       .debug_info or .debug_types were missing. */
 586    if (die >= cc->escn_debug_info.szB) {
 587       if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
 588          *alt_flag = True;
 589          die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
 590       } else {
 591          *type_flag = True;
 592          die -= cc->escn_debug_info.szB;
 593       }
 594    }
 595    return die;
 596 }
 597
 598 /* Return an entry from .debug_addr with the given index.
 599    Call one of the variants below that do error-checking. */
 600 static ULong get_debug_addr_entry_common( ULong index, const CUConst* cc )
 601 {
 602    vg_assert(cc->cu_has_addr_base);
 603    /* We make the same word-size assumption as DW_FORM_addr. */
 604    UWord addr_pos = cc->cu_addr_base + index * sizeof(UWord);
 605    Cursor cur;
 606    init_Cursor( &cur, cc->escn_debug_addr, addr_pos, cc->barf,
 607                 "get_debug_addr_entry_common: index points outside .debug_addr" );
 608    return (ULong)(UWord)get_UWord(&cur);
 609 }
 610
 611 static ULong get_debug_addr_entry_form( ULong index, const CUConst* cc,
 612                                         DW_FORM form )
 613 {
 614    if(!cc->cu_has_addr_base) {
 615       VG_(printf)(
 616          "get_debug_addr_entry_form: %u (%s) without DW_AT_addr_base\n",
 617          form, ML_(pp_DW_FORM)(form));
 618       cc->barf("get_debug_addr_entry_form: DW_AT_addr_base not set");
 619    }
 620    return get_debug_addr_entry_common( index, cc );
 621 }
 622
 623 static ULong get_debug_addr_entry_lle( ULong index, const CUConst* cc,
 624                                        DW_LLE entry )
 625 {
 626    if(!cc->cu_has_addr_base) {
 627       VG_(printf)(
 628          "get_debug_addr_entry_lle: %u (%s) without DW_AT_addr_base\n",
 629          entry, ML_(pp_DW_LLE)(entry));
 630       cc->barf("get_debug_addr_entry_lle: DW_AT_addr_base not set");
 631    }
 632    return get_debug_addr_entry_common( index, cc );
 633 }
 634
 635 static ULong get_debug_addr_entry_rle( ULong index, const CUConst* cc,
 636                                        DW_RLE entry )
 637 {
 638    if(!cc->cu_has_addr_base) {
 639       VG_(printf)(
 640          "get_debug_addr_entry_rle: %u (%s) without DW_AT_addr_base\n",
 641          entry, ML_(pp_DW_RLE)(entry));
 642       cc->barf("get_debug_addr_entry_rle: DW_AT_addr_base not set");
 643    }
 644    return get_debug_addr_entry_common( index, cc );
 645 }
 646
 647 /*------------------------------------------------------------*/
 648 /*---                                                      ---*/
 649 /*--- Helper functions for Guarded Expressions             ---*/
 650 /*---                                                      ---*/
 651 /*------------------------------------------------------------*/
 652
 653 /* Parse the location list starting at img-offset 'debug_loc_offset'
 654    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
 655    and so I believe are correct SVMAs for the object as a whole.  This
 656    function allocates the UChar*, and the caller must deallocate it.
 657    The resulting block is in so-called Guarded-Expression format.
 658
 659    Guarded-Expression format is similar but not identical to the DWARF3
 660    location-list format.  The format of each returned block is:
 661
 662       UChar biasMe;
 663       UChar isEnd;
 664       followed by zero or more of
 665
 666       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
 667
 668    '..bytes..' is an standard DWARF3 location expression which is
 669    valid when aMin <= pc <= aMax (possibly after suitable biasing).
 670
 671    The number of bytes in '..bytes..' is nbytes.
 672
 673    The end of the sequence is marked by an isEnd == 1 value.  All
 674    previous isEnd values must be zero.
 675
 676    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
 677    text_bias added before use, and 0 if the GX is this is not
 678    necessary (is ready to go).
 679
 680    Hence the block can be quickly parsed and is self-describing.  Note
 681    that aMax is 1 less than the corresponding value in a DWARF3
 682    location list.  Zero length ranges, with aMax == aMin-1, are not
 683    allowed.
 684 */
 685 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
 686    it more logically belongs. */
 687
 688
 689 /* Apply a text bias to a GX. */
 690 static void bias_GX ( /*MOD*/GExpr* gx, const DebugInfo* di )
 691 {
 692    UShort nbytes;
 693    UChar* p = &gx->payload[0];
 694    UChar* pA;
 695    UChar  uc;
 696    uc = *p++; /*biasMe*/
 697    if (uc == 0)
 698       return;
 699    vg_assert(uc == 1);
 700    p[-1] = 0; /* mark it as done */
 701    while (True) {
 702       uc = *p++;
 703       if (uc == 1)
 704          break; /*isEnd*/
 705       vg_assert(uc == 0);
 706       /* t-bias aMin */
 707       pA = (UChar*)p;
 708       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
 709       p += sizeof(Addr);
 710       /* t-bias aMax */
 711       pA = (UChar*)p;
 712       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
 713       p += sizeof(Addr);
 714       /* nbytes, and actual expression */
 715       nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
 716       p += nbytes;
 717    }
 718 }
 719
 720 __attribute__((noinline))
 721 static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
 722 {
 723    SizeT  bytesReqd;
 724    GExpr* gx;
 725    UChar *p, *pstart;
 726
 727    vg_assert(sizeof(UWord) == sizeof(Addr));
 728    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
 729    bytesReqd
 730       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
 731         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
 732         + sizeof(UShort) /*nbytes*/    + (SizeT)nbytes
 733         + sizeof(UChar); /*isEnd*/
 734
 735    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
 736                            sizeof(GExpr) + bytesReqd );
 737
 738    p = pstart = &gx->payload[0];
 739
 740    p = ML_(write_UChar)(p, 0);        /*biasMe*/
 741    p = ML_(write_UChar)(p, 0);        /*!isEnd*/
 742    p = ML_(write_Addr)(p, 0);         /*aMin*/
 743    p = ML_(write_Addr)(p, ~0);        /*aMax*/
 744    p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
 745    ML_(cur_read_get)(p, block, nbytes); p += nbytes;
 746    p = ML_(write_UChar)(p, 1);        /*isEnd*/
 747
 748    vg_assert( (SizeT)(p - pstart) == bytesReqd);
 749    vg_assert( &gx->payload[bytesReqd]
 750               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
 751
 752    return gx;
 753 }
 754
 755 __attribute__((noinline))
 756 static GExpr* make_general_GX ( const CUConst* cc,
 757                                 Bool     td3,
 758                                 ULong    offset,
 759                                 Addr     svma_of_referencing_CU )
 760 {
 761    Bool      done;
 762    Addr      base;
 763    Cursor    loc;
 764    XArray*   xa; /* XArray of UChar */
 765    GExpr*    gx;
 766    Word      nbytes;
 767    Bool      addBase = cc->version < 5;
 768
 769    vg_assert(sizeof(UWord) == sizeof(Addr));
 770    if (cc->version < 5 && (!ML_(sli_is_valid)(cc->escn_debug_loc)
 771                            || cc->escn_debug_loc.szB == 0))
 772       cc->barf("make_general_GX: .debug_loc is empty/missing");
 773    if (cc->version >= 5 && (!ML_(sli_is_valid)(cc->escn_debug_loclists)
 774                            || cc->escn_debug_loclists.szB == 0))
 775       cc->barf("make_general_GX: .debug_loclists is empty/missing");
 776
 777    if (cc->version < 5)
 778       init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
 779                    "Overrun whilst reading .debug_loc section(2)" );
 780    else
 781       init_Cursor( &loc, cc->escn_debug_loclists, 0, cc->barf,
 782                    "Overrun whilst reading .debug_loclists section(2)" );
 783    set_position_of_Cursor( &loc, offset );
 784
 785    TRACE_D3("make_general_GX (offset = %llu, ioff = %llu) {\n",
 786             offset, get_DiCursor_from_Cursor(&loc).ioff );
 787
 788    /* Who frees this xa?  It is freed before this fn exits. */
 789    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
 790                     ML_(dinfo_free),
 791                     sizeof(UChar) );
 792
 793    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
 794
 795    base = 0;
 796    done = False;
 797    while (!done) {
 798       Bool  acquire;
 799       UWord len;
 800       UWord w1;
 801       UWord w2;
 802       if (cc->version < 5) {
 803          /* Read a (host-)word pair.  This is something of a hack since
 804             the word size to read is really dictated by the ELF file;
 805             however, we assume we're reading a file with the same
 806             word-sizeness as the host.  Reasonably enough. */
 807          w1 = get_UWord( &loc );
 808          w2 = get_UWord( &loc );
 809
 810          TRACE_D3("   %08lx %08lx\n", w1, w2);
 811          if (w1 == 0 && w2 == 0) {
 812             done = True;
 813             break; /* end of list */
 814          }
 815
 816          if (w1 == -1UL) {
 817             /* new value for 'base' */
 818             base = w2;
 819             continue;
 820          }
 821          /* else a location expression follows */
 822          len = (UWord)get_UShort( &loc );
 823       } else {
 824          w1 = 0;
 825          w2 = 0;
 826          len = 0;
 827          DW_LLE r = get_UChar( &loc );
 828          switch (r) {
 829          case DW_LLE_end_of_list:
 830             done = True;
 831             break;
 832          case DW_LLE_base_address:
 833             base = get_UWord( &loc );
 834             break;
 835          case DW_LLE_start_length:
 836             w1 = get_UWord( &loc );
 837             w2 = w1 + get_ULEB128( &loc );
 838             len = get_ULEB128( &loc );
 839             break;
 840          case DW_LLE_offset_pair:
 841             w1 = base + get_ULEB128( &loc );
 842             w2 = base + get_ULEB128( &loc );
 843             len = get_ULEB128( &loc );
 844             break;
 845          case DW_LLE_start_end:
 846             w1 = get_UWord ( &loc );
 847             w2 = get_UWord ( &loc );
 848             len = get_ULEB128( &loc );
 849             break;
 850          case DW_LLE_GNU_view_pair:
 851             get_ULEB128( &loc );
 852             get_ULEB128( &loc );
 853             break;
 854          case DW_LLE_base_addressx:
 855             base = get_debug_addr_entry_lle( get_ULEB128( &loc ), cc,
 856                                              DW_LLE_base_addressx );
 857             break;
 858          case DW_LLE_startx_endx:
 859             w1 = get_debug_addr_entry_lle( get_ULEB128( &loc ), cc,
 860                                            DW_LLE_startx_endx );
 861             w2 = get_debug_addr_entry_lle( get_ULEB128( &loc ), cc,
 862                                            DW_LLE_startx_endx );
 863             len = get_ULEB128( &loc );
 864             break;
 865          case DW_LLE_startx_length:
 866             w1 = get_debug_addr_entry_lle( get_ULEB128( &loc ), cc,
 867                                            DW_LLE_startx_length );
 868             w2 = w1 + get_ULEB128( &loc );
 869             len = get_ULEB128( &loc );
 870             break;
 871          case DW_LLE_default_location:
 872          default:
 873             cc->barf( "Unhandled or unknown loclists entry" );
 874             done = True;
 875          }
 876       }
 877
 878       /* else enumerate [w1+base, w2+base) */
 879       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
 880          (sec 2.17.2) */
 881       if (w1 > w2) {
 882          TRACE_D3("negative range is for .debug_loc expr at "
 883                   "file offset %llu\n",
 884                   offset);
 885          cc->barf( "negative range in .debug_loc section" );
 886       }
 887
 888       /* ignore zero length ranges */
 889       acquire = w1 < w2;
 890
 891       if (acquire) {
 892          UWord  w;
 893          UShort s;
 894          UChar  c;
 895          c = 0; /* !isEnd*/
 896          VG_(addBytesToXA)( xa, &c, sizeof(c) );
 897          w = w1    + (addBase ? base : 0) + svma_of_referencing_CU;
 898          VG_(addBytesToXA)( xa, &w, sizeof(w) );
 899          w = w2 -1 + (addBase ? base : 0) + svma_of_referencing_CU;
 900          VG_(addBytesToXA)( xa, &w, sizeof(w) );
 901          s = (UShort)len;
 902          VG_(addBytesToXA)( xa, &s, sizeof(s) );
 903       }
 904
 905       while (len > 0) {
 906          UChar byte = get_UChar( &loc );
 907          TRACE_D3("%02x", (UInt)byte);
 908          if (acquire)
 909             VG_(addBytesToXA)( xa, &byte, 1 );
 910          len--;
 911       }
 912       TRACE_D3("\n");
 913    }
 914
 915    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
 916
 917    nbytes = VG_(sizeXA)( xa );
 918    vg_assert(nbytes >= 1);
 919
 920    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
 921    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
 922    vg_assert( &gx->payload[nbytes]
 923               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
 924
 925    VG_(deleteXA)( xa );
 926
 927    TRACE_D3("}\n");
 928
 929    return gx;
 930 }
 931
 932
 933 /*------------------------------------------------------------*/
 934 /*---                                                      ---*/
 935 /*--- Helper functions for range lists and CU headers      ---*/
 936 /*---                                                      ---*/
 937 /*------------------------------------------------------------*/
 938
 939 /* Denotes an address range.  Both aMin and aMax are included in the
 940    range; hence a complete range is (0, ~0) and an empty range is any
 941    (X, X-1) for X > 0.*/
 942 typedef
 943    struct { Addr aMin; Addr aMax; }
 944    AddrRange;
 945
 946
 947 /* Generate an arbitrary structural total ordering on
 948    XArray* of AddrRange. */
 949 static Word cmp__XArrays_of_AddrRange ( const XArray* rngs1,
 950                                         const XArray* rngs2 )
 951 {
 952    Word n1, n2, i;
 953    vg_assert(rngs1 && rngs2);
 954    n1 = VG_(sizeXA)( rngs1 );
 955    n2 = VG_(sizeXA)( rngs2 );
 956    if (n1 < n2) return -1;
 957    if (n1 > n2) return 1;
 958    for (i = 0; i < n1; i++) {
 959       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
 960       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
 961       if (rng1->aMin < rng2->aMin) return -1;
 962       if (rng1->aMin > rng2->aMin) return 1;
 963       if (rng1->aMax < rng2->aMax) return -1;
 964       if (rng1->aMax > rng2->aMax) return 1;
 965    }
 966    return 0;
 967 }
 968
 969
 970 __attribute__((noinline))
 971 static XArray* /* of AddrRange */ empty_range_list ( void )
 972 {
 973    XArray* xa; /* XArray of AddrRange */
 974    /* Who frees this xa?  varstack_preen() does. */
 975    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
 976                     ML_(dinfo_free),
 977                     sizeof(AddrRange) );
 978    return xa;
 979 }
 980
 981
 982 __attribute__((noinline))
 983 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
 984 {
 985    XArray*   xa;
 986    AddrRange pair;
 987    vg_assert(aMin <= aMax);
 988    /* Who frees this xa?  varstack_preen() does. */
 989    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
 990                     ML_(dinfo_free),
 991                     sizeof(AddrRange) );
 992    pair.aMin = aMin;
 993    pair.aMax = aMax;
 994    VG_(addToXA)( xa, &pair );
 995    return xa;
 996 }
 997
 998
 999 /* Enumerate the address ranges starting at img-offset
1000    'debug_ranges_offset' in .debug_ranges.  Results are biased with
1001    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
1002    object as a whole.  This function allocates the XArray, and the
1003    caller must deallocate it. */
1004 __attribute__((noinline))
1005 static XArray* /* of AddrRange */
1006 get_range_list ( const CUConst* cc,
1007                  Bool     td3,
1008                  UWord    debug_ranges_offset,
1009                  Addr     svma_of_referencing_CU )
1010 {
1011    Addr      base;
1012    Cursor    ranges;
1013    XArray*   xa; /* XArray of AddrRange */
1014    AddrRange pair;
1015
1016    if (cc->version < 5 && (!ML_(sli_is_valid)(cc->escn_debug_ranges)
1017                            || cc->escn_debug_ranges.szB == 0))
1018       cc->barf("get_range_list: .debug_ranges is empty/missing");
1019    if (cc->version >= 5 && (!ML_(sli_is_valid)(cc->escn_debug_rnglists)
1020                             || cc->escn_debug_rnglists.szB == 0))
1021       cc->barf("get_range_list: .debug_rnglists is empty/missing");
1022
1023    if (cc->version < 5)
1024       init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
1025                    "Overrun whilst reading .debug_ranges section(2)" );
1026    else
1027       init_Cursor( &ranges, cc->escn_debug_rnglists, 0, cc->barf,
1028                    "Overrun whilst reading .debug_rnglists section(2)" );
1029
1030    set_position_of_Cursor( &ranges, debug_ranges_offset );
1031
1032    /* Who frees this xa?  varstack_preen() does. */
1033    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
1034                     sizeof(AddrRange) );
1035    base = 0;
1036    if (cc->version < 5) {
1037       while (True) {
1038          /* Read a (host-)word pair.  This is something of a hack since
1039             the word size to read is really dictated by the ELF file;
1040             however, we assume we're reading a file with the same
1041             word-sizeness as the host.  Reasonably enough. */
1042          UWord w1 = get_UWord( &ranges );
1043          UWord w2 = get_UWord( &ranges );
1044
1045          if (w1 == 0 && w2 == 0)
1046             break; /* end of list. */
1047
1048          if (w1 == -1UL) {
1049             /* new value for 'base' */
1050             base = w2;
1051             continue;
1052          }
1053
1054          /* else enumerate [w1+base, w2+base) */
1055          /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
1056             (sec 2.17.2) */
1057          if (w1 > w2)
1058             cc->barf( "negative range in .debug_ranges section" );
1059          if (w1 < w2) {
1060             pair.aMin = w1     + base + svma_of_referencing_CU;
1061             pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
1062             vg_assert(pair.aMin <= pair.aMax);
1063             VG_(addToXA)( xa, &pair );
1064          }
1065       }
1066    } else {
1067       Bool done = False;
1068       while (!done) {
1069          UWord w1 = 0;
1070          UWord w2 = 0;
1071          DW_RLE r = get_UChar( &ranges );
1072          switch (r) {
1073          case DW_RLE_end_of_list:
1074             done = True;
1075             break;
1076          case DW_RLE_base_address:
1077             base = get_UWord( &ranges );
1078             break;
1079          case DW_RLE_start_length:
1080             w1 = get_UWord( &ranges );
1081             w2 = w1 + get_ULEB128( &ranges );
1082             break;
1083          case DW_RLE_offset_pair:
1084             w1 = base + get_ULEB128( &ranges );
1085             w2 = base + get_ULEB128( &ranges );
1086             break;
1087          case DW_RLE_start_end:
1088             w1 = get_UWord ( &ranges );
1089             w2 = get_UWord ( &ranges );
1090             break;
1091          case DW_RLE_base_addressx:
1092             base = get_debug_addr_entry_rle( get_ULEB128( &ranges ), cc,
1093                                              DW_RLE_base_addressx );
1094             break;
1095          case DW_RLE_startx_endx:
1096             w1 = get_debug_addr_entry_rle( get_ULEB128( &ranges ), cc,
1097                                            DW_RLE_startx_endx );
1098             w2 = get_debug_addr_entry_rle( get_ULEB128( &ranges ), cc,
1099                                            DW_RLE_startx_endx );
1100             break;
1101          case DW_RLE_startx_length:
1102             w1 = get_debug_addr_entry_rle( get_ULEB128( &ranges ), cc,
1103                                            DW_RLE_startx_length );
1104             w2 = w1 + get_ULEB128( &ranges );
1105             break;
1106          default:
1107             cc->barf( "Unhandled or unknown range list entry" );
1108             done = True;
1109          }
1110          if (w1 > w2)
1111             cc->barf( "negative range in .debug_rnglists section" );
1112          if (w1 < w2) {
1113             pair.aMin = w1     + svma_of_referencing_CU;
1114             pair.aMax = w2 - 1 + svma_of_referencing_CU;
1115             vg_assert(pair.aMin <= pair.aMax);
1116             VG_(addToXA)( xa, &pair );
1117          }
1118       }
1119    }
1120    return xa;
1121 }
1122
1123 #define VARSZ_FORM 0xffffffff
1124 static UInt get_Form_szB (const CUConst* cc, DW_FORM form );
1125
1126 /* Initialises the hash table of abbreviations.  This only sets up the abbv
1127    Cursor and hash table, but does not try to read any abbrevs yes. The actual
1128    reading of abbrevs will be done by get_abbv by calling find_ht_abbvs on
1129    demand if a requested abbrev code isn't in the hash table yet. When using the
1130    inline parser a lot of abbrevs will not be needed so reading everything
1131    upfront will often waste time and memory.  */
1132 static void init_ht_abbvs (CUConst* cc, ULong debug_abbrev_offset,
1133                            Bool td3)
1134 {
1135    Cursor *c = &cc->abbv.c;
1136    init_Cursor( c, cc->debug_abbv, 0, cc->barf,
1137                "Overrun whilst parsing .debug_abbrev section(2)" );
1138    cc->abbv.ht_abbvs = VG_(HT_construct) ("di.readdwarf3.ht_abbvs");
1139    cc->abbv.debug_abbrev_offset = debug_abbrev_offset;
1140    cc->abbv.done = False;
1141 }
1142
1143 static g_abbv *find_ht_abbvs (CUConst* cc, ULong abbv_code,
1144                               Bool td3)
1145 {
1146    Cursor *c;
1147    g_abbv *ta; // temporary abbreviation, reallocated if needed.
1148    UInt ta_nf_maxE; // max nr of pairs in ta.nf[], doubled when reallocated.
1149    UInt ta_nf_n;    // nr of pairs in ta->nf that are initialised.
1150    g_abbv *ht_ta; // abbv to insert in hash table.
1151    Int i;
1152
1153    #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
1154
1155    ta_nf_maxE = 10; // starting with enough for 9 pairs+terminating pair.
1156    ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE));
1157
1158    c = &cc->abbv.c;
1159    while (True) {
1160       ht_ta = NULL;
1161       ta->abbv_code = get_ULEB128( c );
1162       if (ta->abbv_code == 0) {
1163          cc->abbv.done = True;
1164          break; /* end of the table */
1165       }
1166
1167       ta->atag = get_ULEB128( c );
1168       ta->has_children = get_UChar( c );
1169       ta_nf_n = 0;
1170       while (True) {
1171          if (ta_nf_n >= ta_nf_maxE) {
1172             g_abbv *old_ta = ta;
1173             ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf",
1174                                     SZ_G_ABBV(2 * ta_nf_maxE));
1175             ta_nf_maxE = 2 * ta_nf_maxE;
1176             VG_(memcpy) (ta, old_ta, SZ_G_ABBV(ta_nf_n));
1177             ML_(dinfo_free) (old_ta);
1178          }
1179          ta->nf[ta_nf_n].at_name = get_ULEB128( c );
1180          ta->nf[ta_nf_n].at_form = get_ULEB128( c );
1181          if (ta->nf[ta_nf_n].at_form == DW_FORM_implicit_const)
1182             ta->nf[ta_nf_n].at_val = get_SLEB128( c );
1183          if (ta->nf[ta_nf_n].at_name == 0 && ta->nf[ta_nf_n].at_form == 0) {
1184             ta_nf_n++;
1185             break;
1186          }
1187         ta_nf_n++;
1188       }
1189
1190       // Initialises the skip_szB/next_nf elements : an element at position
1191       // i must contain the sum of its own size + the sizes of all elements
1192       // following i till either the next variable size element, the next
1193       // sibling element or the end of the DIE.
1194       ta->nf[ta_nf_n - 1].skip_szB = 0;
1195       ta->nf[ta_nf_n - 1].next_nf = 0;
1196       for (i = ta_nf_n - 2; i >= 0; i--) {
1197          const UInt form_szB = get_Form_szB (cc, (DW_FORM)ta->nf[i].at_form);
1198
1199          if (ta->nf[i+1].at_name == DW_AT_sibling
1200              || ta->nf[i+1].skip_szB == VARSZ_FORM) {
1201             ta->nf[i].skip_szB = form_szB;
1202             ta->nf[i].next_nf  = i+1;
1203          } else if (form_szB == VARSZ_FORM) {
1204             ta->nf[i].skip_szB = form_szB;
1205             ta->nf[i].next_nf  = i+1;
1206          } else {
1207             ta->nf[i].skip_szB = ta->nf[i+1].skip_szB + form_szB;
1208             ta->nf[i].next_nf  = ta->nf[i+1].next_nf;
1209          }
1210       }
1211
1212       ht_ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n));
1213       VG_(memcpy) (ht_ta, ta, SZ_G_ABBV(ta_nf_n));
1214       VG_(HT_add_node) ( cc->abbv.ht_abbvs, ht_ta );
1215       if (TD3) {
1216          TRACE_D3("  Adding abbv_code %lu TAG  %s [%s] nf %u ",
1217                   ht_ta->abbv_code, ML_(pp_DW_TAG)(ht_ta->atag),
1218                   ML_(pp_DW_children)(ht_ta->has_children),
1219                   ta_nf_n);
1220          TRACE_D3("  ");
1221          for (i = 0; i < ta_nf_n; i++)
1222             TRACE_D3("[%u,%u] ", ta->nf[i].skip_szB, ta->nf[i].next_nf);
1223          TRACE_D3("\n");
1224       }
1225       if (ht_ta->abbv_code == abbv_code)
1226          break;
1227    }
1228
1229    ML_(dinfo_free) (ta);
1230    #undef SZ_G_ABBV
1231
1232    return ht_ta;
1233 }
1234
1235 static g_abbv* get_abbv (CUConst* cc, ULong abbv_code,
1236                          Bool td3)
1237 {
1238    g_abbv *abbv;
1239
1240    abbv = VG_(HT_lookup) (cc->abbv.ht_abbvs, abbv_code);
1241    if (!abbv && !cc->abbv.done)
1242       abbv = find_ht_abbvs (cc, abbv_code, td3);
1243    if (!abbv)
1244       cc->barf ("abbv_code not found in ht_abbvs table");
1245
1246    return abbv;
1247 }
1248
1249 /* Parse the Compilation Unit header indicated at 'c' and
1250    initialise 'cc' accordingly. */
1251 static __attribute__((noinline))
1252 void parse_CU_Header ( /*OUT*/CUConst* cc,
1253                        Bool td3,
1254                        Cursor* c,
1255                        DiSlice escn_debug_abbv,
1256                        abbv_state last_abbv,
1257                        Bool type_unit,
1258                        Bool alt_info )
1259 {
1260    UChar  address_size, unit_type;
1261    ULong  debug_abbrev_offset;
1262
1263    VG_(memset)(cc, 0, sizeof(*cc));
1264    vg_assert(c && c->barf);
1265    cc->barf = c->barf;
1266
1267    /* initial_length field */
1268    cc->unit_length
1269       = get_Initial_Length( &cc->is_dw64, c,
1270            "parse_CU_Header: invalid initial-length field" );
1271
1272    TRACE_D3("   Length:        %llu\n", cc->unit_length );
1273
1274    /* version */
1275    cc->version = get_UShort( c );
1276    if (cc->version != 2 && cc->version != 3 && cc->version != 4
1277        && cc->version != 5)
1278       cc->barf( "parse_CU_Header: "
1279                 "is neither DWARF2 nor DWARF3 nor DWARF4 nor DWARF5" );
1280    TRACE_D3("   Version:       %d\n", (Int)cc->version );
1281
1282    /* unit type */
1283    if (cc->version >= 5) {
1284       unit_type = get_UChar( c );
1285       address_size = get_UChar( c );
1286    } else {
1287       unit_type = type_unit ? DW_UT_type : DW_UT_compile;
1288       address_size = 0; /* Will be read later. */
1289    }
1290
1291    /* debug_abbrev_offset */
1292    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1293    if (debug_abbrev_offset >= escn_debug_abbv.szB)
1294       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
1295    TRACE_D3("   Abbrev Offset: %llu\n", debug_abbrev_offset );
1296
1297    /* address size.  If this isn't equal to the host word size, just
1298       give up.  This makes it safe to assume elsewhere that
1299       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
1300       word. */
1301    if (cc->version < 5)
1302       address_size = get_UChar( c );
1303
1304    if (address_size != sizeof(void*))
1305       cc->barf( "parse_CU_Header: invalid address_size" );
1306    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
1307
1308    cc->is_type_unit = type_unit;
1309    cc->is_alt_info = alt_info;
1310
1311    if (type_unit || (cc->version >= 5 && (unit_type == DW_UT_type
1312                                           || unit_type == DW_UT_split_type))) {
1313       cc->type_signature = get_ULong( c );
1314       cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1315    }
1316
1317    if (cc->version >= 5 && (unit_type == DW_UT_skeleton
1318                             || unit_type == DW_UT_split_compile)) {
1319       /* dwo_id = */ get_ULong( c );
1320    }
1321
1322    /* Set up cc->debug_abbv to point to the relevant table for this
1323       CU.  Set its .szB so that at least we can't read off the end of
1324       the debug_abbrev section -- potentially (and quite likely) too
1325       big, if this isn't the last table in the section, but at least
1326       it's safe.
1327
1328       This amounts to taking debug_abbv_escn and moving the start
1329       position along by debug_abbrev_offset bytes, hence forming a
1330       smaller DiSlice which has the same end point.  Since we checked
1331       just above that debug_abbrev_offset is less than the size of
1332       debug_abbv_escn, this should leave us with a nonempty slice. */
1333    vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
1334    cc->debug_abbv      = escn_debug_abbv;
1335    cc->debug_abbv.ioff += debug_abbrev_offset;
1336    cc->debug_abbv.szB  -= debug_abbrev_offset;
1337
1338    if (last_abbv.ht_abbvs != NULL
1339        && debug_abbrev_offset == last_abbv.debug_abbrev_offset) {
1340       cc->abbv = last_abbv;
1341    } else {
1342       if (last_abbv.ht_abbvs != NULL)
1343          VG_(HT_destruct) (last_abbv.ht_abbvs, ML_(dinfo_free));
1344       init_ht_abbvs(cc, debug_abbrev_offset, td3);
1345    }
1346 }
1347
1348 /* This represents a single signatured type.  It maps a type signature
1349    (a ULong) to a cooked DIE offset.  Objects of this type are stored
1350    in the type signature hash table.  */
1351 typedef
1352    struct D3SignatureType {
1353       struct D3SignatureType *next;
1354       UWord data;
1355       ULong type_signature;
1356       UWord die;
1357    }
1358    D3SignatureType;
1359
1360 /* Record a signatured type in the hash table.  */
1361 static void record_signatured_type ( VgHashTable *tab,
1362                                      ULong type_signature,
1363                                      UWord die )
1364 {
1365    D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
1366                                                  sizeof(D3SignatureType) );
1367    dstype->data = (UWord) type_signature;
1368    dstype->type_signature = type_signature;
1369    dstype->die = die;
1370    VG_(HT_add_node) ( tab, dstype );
1371 }
1372
1373 /* Given a type signature hash table and a type signature, return the
1374    cooked DIE offset of the type.  If the type cannot be found, call
1375    BARF.  */
1376 static UWord lookup_signatured_type ( const VgHashTable *tab,
1377                                       ULong type_signature,
1378                                       void (*barf)( const HChar* ) __attribute__((noreturn)) )
1379 {
1380    D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
1381    /* This may be unwarranted chumminess with the hash table
1382       implementation.  */
1383    while ( dstype != NULL && dstype->type_signature != type_signature)
1384       dstype = dstype->next;
1385    if (dstype == NULL) {
1386       barf("lookup_signatured_type: could not find signatured type");
1387       /*NOTREACHED*/
1388       vg_assert(0);
1389    }
1390    return dstype->die;
1391 }
1392
1393
1394 /* Represents Form data.  If szB is 1/2/4/8 then the result is in the
1395    lowest 1/2/4/8 bytes of u.val.  If szB is zero or negative then the
1396    result is an image section beginning at u.cur and with size -szB.
1397    No other szB values are allowed. */
1398 typedef
1399    struct {
1400       Long szB; // 1, 2, 4, 8 or non-positive values only.
1401       union { ULong val; DiCursor cur; } u;
1402    }
1403    FormContents;
1404
1405 // Read data for get_Form_contents() from .debug_addr for the 'index' entry.
1406 static void get_Form_contents_addr( /*OUT*/FormContents* cts, DW_FORM form,
1407                                     ULong index, const CUConst* cc, Bool td3 )
1408 {
1409    cts->u.val = get_debug_addr_entry_form( index, cc, form );
1410    cts->szB   = sizeof(UWord);
1411    TRACE_D3("0x%lx", (UWord)cts->u.val);
1412 }
1413
1414 // Read data for get_Form_contents() from .debug_str for the given offset.
1415 static void get_Form_contents_str( /*OUT*/FormContents* cts, DW_FORM form,
1416                                     UWord offset, const CUConst* cc, Bool td3 )
1417 {
1418    if (!ML_(sli_is_valid)(cc->escn_debug_str)
1419        || offset >= cc->escn_debug_str.szB) {
1420       VG_(printf)(
1421          "get_Form_contents_str: %u (%s) points outside .debug_str\n",
1422          form, ML_(pp_DW_FORM)(form));
1423       cc->barf("get_Form_contents_str: index points outside .debug_str");
1424    }
1425    /* FIXME: check the entire string lies inside debug_str,
1426       not just the first byte of it. */
1427    DiCursor str
1428       = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), offset );
1429    if (TD3) {
1430       HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
1431       TRACE_D3("(indirect string, offset: 0x%lx): %s", offset, tmp);
1432       ML_(dinfo_free)(tmp);
1433    }
1434    cts->u.cur = str;
1435    cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1436 }
1437
1438 static inline UInt sizeof_Dwarfish_UWord (Bool is_dw64)
1439 {
1440    if (is_dw64)
1441       return sizeof(ULong);
1442    else
1443       return sizeof(UInt);
1444 }
1445
1446 // Read data for get_Form_contents() from .debug_str_offsets for the 'index' entry.
1447 static void get_Form_contents_str_offsets( /*OUT*/FormContents* cts, DW_FORM form,
1448                                     ULong index, const CUConst* cc, Bool td3 )
1449 {
1450    if(!cc->cu_has_str_offsets_base) {
1451       VG_(printf)(
1452          "get_Form_contents_str_offsets: %u (%s) without DW_AT_str_offsets_base\n",
1453          form, ML_(pp_DW_FORM)(form));
1454       cc->barf("get_Form_contents_str_offsets: DW_AT_str_offsets_base not set");
1455    }
1456    UWord str_offsets_pos = cc->cu_str_offsets_base
1457                            + index * sizeof_Dwarfish_UWord (cc->is_dw64);
1458    Cursor cur;
1459    init_Cursor( &cur, cc->escn_debug_str_offsets, str_offsets_pos, cc->barf,
1460                 "get_Form_contents_str_offsets: index "
1461                 "points outside .debug_str_offsets" );
1462    if (TD3) {
1463       HChar* tmp = ML_(cur_read_strdup)(get_DiCursor_from_Cursor(&cur), "di.getFC.1");
1464       TRACE_D3("(indirect string offset, offset: 0x%lx): %s", str_offsets_pos, tmp);
1465       ML_(dinfo_free)(tmp);
1466    }
1467    get_Form_contents_str( cts, form, get_Dwarfish_UWord(&cur, cc->is_dw64), cc, td3 );
1468 }
1469
1470 /* From 'c', get the Form data into 'cts'.  Either it gets a 1/2/4/8
1471    byte scalar value, or (a reference to) zero or more bytes starting
1472    at a DiCursor.*/
1473 static
1474 void get_Form_contents ( /*OUT*/FormContents* cts,
1475                          const CUConst* cc, Cursor* c,
1476                          Bool td3, const name_form *abbv )
1477 {
1478    DW_FORM form = abbv->at_form;
1479    VG_(bzero_inline)(cts, sizeof(*cts));
1480    // !!! keep switch in sync with get_Form_szB. The nr of characters read below
1481    // must be computed similarly in get_Form_szB.
1482    // The consistency is verified in trace_DIE.
1483    switch (form) {
1484       case DW_FORM_data1:
1485          cts->u.val = (ULong)(UChar)get_UChar(c);
1486          cts->szB   = 1;
1487          TRACE_D3("%u", (UInt)cts->u.val);
1488          break;
1489       case DW_FORM_data2:
1490          cts->u.val = (ULong)(UShort)get_UShort(c);
1491          cts->szB   = 2;
1492          TRACE_D3("%u", (UInt)cts->u.val);
1493          break;
1494       case DW_FORM_data4:
1495          cts->u.val = (ULong)(UInt)get_UInt(c);
1496          cts->szB   = 4;
1497          TRACE_D3("%u", (UInt)cts->u.val);
1498          break;
1499       case DW_FORM_data8:
1500          cts->u.val = get_ULong(c);
1501          cts->szB   = 8;
1502          TRACE_D3("%llu", cts->u.val);
1503          break;
1504      case DW_FORM_data16: {
1505          /* This is more like a block than an integral value.  */
1506          ULong    u64b;
1507          DiCursor data16 = get_DiCursor_from_Cursor(c);
1508          TRACE_D3("data16: ");
1509          for (u64b = 16; u64b > 0; u64b--) {
1510             UChar u8 = get_UChar(c);
1511             TRACE_D3("%x ", (UInt)u8);
1512          }
1513          cts->u.cur = data16;
1514          cts->szB   = - (Long)16;
1515          break;
1516          }
1517       case DW_FORM_sec_offset:
1518          cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
1519          cts->szB   = cc->is_dw64 ? 8 : 4;
1520          TRACE_D3("%llu", cts->u.val);
1521          break;
1522       case DW_FORM_rnglistx: {
1523          if(!cc->cu_has_rnglists_base) {
1524             cc->barf("get_Form_contents: DW_FORM_rnglistsx"
1525                      " without DW_AT_rnglists_base");
1526          }
1527          /* Convert index to offset pointing to the offsets list. */
1528          ULong index = get_ULEB128(c);
1529          ULong offset_to_offset = cc->cu_rnglists_base + index * sizeof_Dwarfish_UWord( cc->is_dw64 );
1530          /* And read the offset value from there. */
1531          Cursor cur;
1532          init_Cursor( &cur, cc->escn_debug_rnglists, offset_to_offset, cc->barf,
1533                       "get_Form_contents: index points outside .debug_rnglists" );
1534          cts->u.val = cc->cu_rnglists_base + get_Dwarfish_UWord(&cur, cc->is_dw64);
1535          cts->szB   = 8;
1536          TRACE_D3("%llu", cts->u.val);
1537          break;
1538       }
1539       case DW_FORM_loclistx: {
1540          if(!cc->cu_has_loclists_base) {
1541             cc->barf("get_Form_contents: DW_FORM_loclistsx"
1542                      " without DW_AT_loclists_base");
1543          }
1544          /* Convert index to offset pointing to the offsets list. */
1545          ULong index = get_ULEB128(c);
1546          ULong offset_to_offset = cc->cu_loclists_base + index * sizeof_Dwarfish_UWord( cc->is_dw64 );
1547          /* And read the offset value from there. */
1548          Cursor cur;
1549          init_Cursor( &cur, cc->escn_debug_loclists, offset_to_offset, cc->barf,
1550                       "get_Form_contents: index points outside .debug_loclists" );
1551          cts->u.val = cc->cu_loclists_base + get_Dwarfish_UWord(&cur, cc->is_dw64);
1552          cts->szB   = 8;
1553          TRACE_D3("%llu", cts->u.val);
1554          break;
1555       }
1556       case DW_FORM_sdata:
1557          cts->u.val = (ULong)(Long)get_SLEB128(c);
1558          cts->szB   = 8;
1559          TRACE_D3("%llu", cts->u.val);
1560          break;
1561       case DW_FORM_udata:
1562          cts->u.val = (ULong)(Long)get_ULEB128(c);
1563          cts->szB   = 8;
1564          TRACE_D3("%llu", cts->u.val);
1565          break;
1566       case DW_FORM_addr:
1567          /* note, this is a hack.  DW_FORM_addr is defined as getting
1568             a word the size of the target machine as defined by the
1569             address_size field in the CU Header.  However,
1570             parse_CU_Header() rejects all inputs except those for
1571             which address_size == sizeof(Word), hence we can just
1572             treat it as a (host) Word.  */
1573          cts->u.val = (ULong)(UWord)get_UWord(c);
1574          cts->szB   = sizeof(UWord);
1575          TRACE_D3("0x%lx", (UWord)cts->u.val);
1576          break;
1577
1578       case DW_FORM_ref_addr:
1579          /* We make the same word-size assumption as DW_FORM_addr. */
1580          /* What does this really mean?  From D3 Sec 7.5.4,
1581             description of "reference", it would appear to reference
1582             some other DIE, by specifying the offset from the
1583             beginning of a .debug_info section.  The D3 spec mentions
1584             that this might be in some other shared object and
1585             executable.  But I don't see how the name of the other
1586             object/exe is specified.
1587
1588             At least for the DW_FORM_ref_addrs created by icc11, the
1589             references seem to be within the same object/executable.
1590             So for the moment we merely range-check, to see that they
1591             actually do specify a plausible offset within this
1592             object's .debug_info, and return the value unchanged.
1593
1594             In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1595             DWARF 3 and later, it is offset-sized.
1596          */
1597          if (cc->version == 2) {
1598             cts->u.val = (ULong)(UWord)get_UWord(c);
1599             cts->szB   = sizeof(UWord);
1600          } else {
1601             cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1602             cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1603          }
1604          TRACE_D3("0x%lx", (UWord)cts->u.val);
1605          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
1606          if (/* the following is surely impossible, but ... */
1607              !ML_(sli_is_valid)(cc->escn_debug_info)
1608              || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
1609             /* Hmm.  Offset is nonsensical for this object's .debug_info
1610                section.  Be safe and reject it. */
1611             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1612                      "outside .debug_info");
1613          }
1614          break;
1615
1616       case DW_FORM_strp: {
1617          /* this is an offset into .debug_str */
1618          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1619          get_Form_contents_str( cts, form, uw, cc, td3 );
1620          break;
1621       }
1622       case DW_FORM_line_strp: {
1623          /* this is an offset into .debug_line_str */
1624          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1625          if (!ML_(sli_is_valid)(cc->escn_debug_line_str)
1626              || uw >= cc->escn_debug_line_str.szB)
1627             cc->barf("get_Form_contents: DW_FORM_line_strp "
1628                      "points outside .debug_line_str");
1629          /* FIXME: check the entire string lies inside debug_line_str,
1630             not just the first byte of it. */
1631          DiCursor line_str
1632             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_line_str), uw );
1633          if (TD3) {
1634             HChar* tmp = ML_(cur_read_strdup)(line_str, "di.getFC.1.5");
1635             TRACE_D3("(indirect line string, offset: 0x%lx): %s", uw, tmp);
1636             ML_(dinfo_free)(tmp);
1637          }
1638          cts->u.cur = line_str;
1639          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(line_str));
1640          break;
1641       }
1642       case DW_FORM_string: {
1643          DiCursor str = get_AsciiZ(c);
1644          if (TD3) {
1645             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
1646             TRACE_D3("%s", tmp);
1647             ML_(dinfo_free)(tmp);
1648          }
1649          cts->u.cur = str;
1650          /* strlen is safe because get_AsciiZ already 'vetted' the
1651             entire string */
1652          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1653          break;
1654       }
1655       case DW_FORM_ref1: {
1656          UChar u8   = get_UChar(c);
1657          UWord res  = cc->cu_start_offset + (UWord)u8;
1658          cts->u.val = (ULong)res;
1659          cts->szB   = sizeof(UWord);
1660          TRACE_D3("<%lx>", res);
1661          break;
1662       }
1663       case DW_FORM_ref2: {
1664          UShort u16 = get_UShort(c);
1665          UWord  res = cc->cu_start_offset + (UWord)u16;
1666          cts->u.val = (ULong)res;
1667          cts->szB   = sizeof(UWord);
1668          TRACE_D3("<%lx>", res);
1669          break;
1670       }
1671       case DW_FORM_ref4: {
1672          UInt  u32  = get_UInt(c);
1673          UWord res  = cc->cu_start_offset + (UWord)u32;
1674          cts->u.val = (ULong)res;
1675          cts->szB   = sizeof(UWord);
1676          TRACE_D3("<%lx>", res);
1677          break;
1678       }
1679       case DW_FORM_ref8: {
1680          ULong u64  = get_ULong(c);
1681          UWord res  = cc->cu_start_offset + (UWord)u64;
1682          cts->u.val = (ULong)res;
1683          cts->szB   = sizeof(UWord);
1684          TRACE_D3("<%lx>", res);
1685          break;
1686       }
1687       case DW_FORM_ref_udata: {
1688          ULong u64  = get_ULEB128(c);
1689          UWord res  = cc->cu_start_offset + (UWord)u64;
1690          cts->u.val = (ULong)res;
1691          cts->szB   = sizeof(UWord);
1692          TRACE_D3("<%lx>", res);
1693          break;
1694       }
1695       case DW_FORM_flag: {
1696          UChar u8 = get_UChar(c);
1697          TRACE_D3("%u", (UInt)u8);
1698          cts->u.val = (ULong)u8;
1699          cts->szB   = 1;
1700          break;
1701       }
1702       case DW_FORM_flag_present:
1703          TRACE_D3("1");
1704          cts->u.val = 1;
1705          cts->szB   = 1;
1706          break;
1707       case DW_FORM_implicit_const:
1708          cts->u.val = (ULong)abbv->at_val;
1709          cts->szB   = 8;
1710          TRACE_D3("%llu", cts->u.val);
1711          break;
1712       case DW_FORM_block1: {
1713          ULong    u64b;
1714          ULong    u64   = (ULong)get_UChar(c);
1715          DiCursor block = get_DiCursor_from_Cursor(c);
1716          TRACE_D3("%llu byte block: ", u64);
1717          for (u64b = u64; u64b > 0; u64b--) {
1718             UChar u8 = get_UChar(c);
1719             TRACE_D3("%x ", (UInt)u8);
1720          }
1721          cts->u.cur = block;
1722          cts->szB   = - (Long)u64;
1723          break;
1724       }
1725       case DW_FORM_block2: {
1726          ULong    u64b;
1727          ULong    u64   = (ULong)get_UShort(c);
1728          DiCursor block = get_DiCursor_from_Cursor(c);
1729          TRACE_D3("%llu byte block: ", u64);
1730          for (u64b = u64; u64b > 0; u64b--) {
1731             UChar u8 = get_UChar(c);
1732             TRACE_D3("%x ", (UInt)u8);
1733          }
1734          cts->u.cur = block;
1735          cts->szB   = - (Long)u64;
1736          break;
1737       }
1738       case DW_FORM_block4: {
1739          ULong    u64b;
1740          ULong    u64   = (ULong)get_UInt(c);
1741          DiCursor block = get_DiCursor_from_Cursor(c);
1742          TRACE_D3("%llu byte block: ", u64);
1743          for (u64b = u64; u64b > 0; u64b--) {
1744             UChar u8 = get_UChar(c);
1745             TRACE_D3("%x ", (UInt)u8);
1746          }
1747          cts->u.cur = block;
1748          cts->szB   = - (Long)u64;
1749          break;
1750       }
1751       case DW_FORM_exprloc:
1752       case DW_FORM_block: {
1753          ULong    u64b;
1754          ULong    u64   = (ULong)get_ULEB128(c);
1755          DiCursor block = get_DiCursor_from_Cursor(c);
1756          TRACE_D3("%llu byte block: ", u64);
1757          for (u64b = u64; u64b > 0; u64b--) {
1758             UChar u8 = get_UChar(c);
1759             TRACE_D3("%x ", (UInt)u8);
1760          }
1761          cts->u.cur = block;
1762          cts->szB   = - (Long)u64;
1763          break;
1764       }
1765       case DW_FORM_ref_sig8: {
1766          ULong  u64b;
1767          ULong  signature = get_ULong (c);
1768          ULong  work = signature;
1769          TRACE_D3("8 byte signature: ");
1770          for (u64b = 8; u64b > 0; u64b--) {
1771             UChar u8 = work & 0xff;
1772             TRACE_D3("%x ", (UInt)u8);
1773             work >>= 8;
1774          }
1775
1776          /* cc->signature_types is only built/initialised when
1777             VG_(clo_read_var_info) is set. In this case,
1778             the DW_FORM_ref_sig8 can be looked up.
1779             But we can also arrive here when only reading inline info
1780             and VG_(clo_trace_symtab) is set. In such a case,
1781             we cannot lookup the DW_FORM_ref_sig8, we rather assign
1782             a dummy value. This is a kludge, but otherwise,
1783             the 'dwarf inline info reader' tracing would have to
1784             do type processing/reading. It is better to avoid
1785             adding significant 'real' processing only due to tracing. */
1786          if (VG_(clo_read_var_info)) {
1787             /* Due to the way that the hash table is constructed, the
1788                resulting DIE offset here is already "cooked".  See
1789                cook_die_using_form.  */
1790             cts->u.val = lookup_signatured_type (cc->signature_types, signature,
1791                                                  c->barf);
1792          } else {
1793             vg_assert (td3);
1794             vg_assert (VG_(clo_read_inline_info));
1795             TRACE_D3("<not dereferencing signature type>");
1796             cts->u.val = 0; /* Assign a dummy/rubbish value */
1797          }
1798          cts->szB   = sizeof(UWord);
1799          break;
1800       }
1801       case DW_FORM_indirect: {
1802          /* Urgh, this is ugly and somewhat unclear how it works
1803             with DW_FORM_implicit_const. HACK.  */
1804          name_form nfi = *abbv;
1805          nfi.at_form = (DW_FORM)get_ULEB128(c);
1806          get_Form_contents (cts, cc, c, td3, &nfi);
1807          return;
1808       }
1809
1810       case DW_FORM_GNU_ref_alt:
1811          cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1812          cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1813          TRACE_D3("0x%lx", (UWord)cts->u.val);
1814          if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
1815          if (/* the following is surely impossible, but ... */
1816              !ML_(sli_is_valid)(cc->escn_debug_info_alt))
1817             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
1818                      "but no alternate .debug_info");
1819          else if (cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
1820             /* Hmm.  Offset is nonsensical for this object's .debug_info
1821                section.  Be safe and reject it. */
1822             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
1823                      "outside alternate .debug_info");
1824          }
1825          break;
1826
1827       case DW_FORM_GNU_strp_alt: {
1828          /* this is an offset into alternate .debug_str */
1829          SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1830          if (!ML_(sli_is_valid)(cc->escn_debug_str_alt))
1831             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
1832                      "but no alternate .debug_str");
1833          else if (uw >= cc->escn_debug_str_alt.szB)
1834             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1835                      "points outside alternate .debug_str");
1836          /* FIXME: check the entire string lies inside debug_str,
1837             not just the first byte of it. */
1838          DiCursor str
1839             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
1840          if (TD3) {
1841             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
1842             TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
1843             ML_(dinfo_free)(tmp);
1844          }
1845          cts->u.cur = str;
1846          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1847          break;
1848       }
1849
1850       case DW_FORM_addrx: {
1851          /* this is an offset into .debug_addr */
1852          ULong index = (ULong)(Long)get_ULEB128(c);
1853          get_Form_contents_addr(cts, form, index, cc, td3);
1854          break;
1855       }
1856       case DW_FORM_addrx1: {
1857          /* this is an offset into .debug_addr */
1858          ULong index = (ULong)get_UChar(c);
1859          get_Form_contents_addr(cts, form, index, cc, td3);
1860          break;
1861       }
1862       case DW_FORM_addrx2: {
1863          /* this is an offset into .debug_addr */
1864          ULong index = (ULong)get_UShort(c);
1865          get_Form_contents_addr(cts, form, index, cc, td3);
1866          break;
1867       }
1868       case DW_FORM_addrx3: {
1869          /* this is an offset into .debug_addr */
1870          ULong index = (ULong)get_UInt3(c);
1871          get_Form_contents_addr(cts, form, index, cc, td3);
1872          break;
1873       }
1874       case DW_FORM_addrx4: {
1875          /* this is an offset into .debug_addr */
1876          ULong index = (ULong)get_UInt(c);
1877          get_Form_contents_addr(cts, form, index, cc, td3);
1878          break;
1879       }
1880       case DW_FORM_strx: {
1881          /* this is an offset into .debug_str_offsets */
1882          ULong index = (ULong)(Long)get_ULEB128(c);
1883          get_Form_contents_str_offsets(cts, form, index, cc, td3);
1884          break;
1885       }
1886       case DW_FORM_strx1: {
1887          /* this is an offset into .debug_str_offsets */
1888          ULong index = get_UChar(c);
1889          get_Form_contents_str_offsets(cts, form, index, cc, td3);
1890          break;
1891       }
1892       case DW_FORM_strx2: {
1893          /* this is an offset into .debug_str_offsets */
1894          ULong index = (ULong)get_UShort(c);
1895          get_Form_contents_str_offsets(cts, form, index, cc, td3);
1896          break;
1897       }
1898       case DW_FORM_strx3: {
1899          /* this is an offset into .debug_str_offsets */
1900          ULong index = (ULong)get_UInt3(c);
1901          get_Form_contents_str_offsets(cts, form, index, cc, td3);
1902          break;
1903       }
1904       case DW_FORM_strx4: {
1905          /* this is an offset into .debug_str_offsets */
1906          ULong index = (ULong)get_UInt(c);
1907          get_Form_contents_str_offsets(cts, form, index, cc, td3);
1908          break;
1909       }
1910
1911       default:
1912          VG_(printf)(
1913             "get_Form_contents: unhandled %u (%s) at <%llx>\n",
1914             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1915          c->barf("get_Form_contents: unhandled DW_FORM");
1916    }
1917 }
1918
1919 #define VARSZ_FORM 0xffffffff
1920 /* If the form is a fixed length form, return the nr of bytes for this form.
1921    If the form is a variable length form, return VARSZ_FORM. */
1922 static
1923 UInt get_Form_szB (const CUConst* cc, DW_FORM form )
1924 {
1925    // !!! keep switch in sync with get_Form_contents : the nr of bytes
1926    // read from a cursor by get_Form_contents must be returned by
1927    // the below switch.
1928    // The consistency is verified in trace_DIE.
1929    switch (form) {
1930       case DW_FORM_data1: return 1;
1931       case DW_FORM_data2: return 2;
1932       case DW_FORM_data4: return 4;
1933       case DW_FORM_data8: return 8;
1934       case DW_FORM_data16: return 16;
1935       case DW_FORM_sec_offset:
1936          if (cc->is_dw64)
1937             return 8;
1938          else
1939             return 4;
1940       case DW_FORM_rnglistx:
1941       case DW_FORM_loclistx:
1942          return VARSZ_FORM;
1943       case DW_FORM_sdata:
1944          return VARSZ_FORM;
1945       case DW_FORM_udata:
1946          return VARSZ_FORM;
1947       case DW_FORM_addr: // See hack in get_Form_contents
1948          return sizeof(UWord);
1949       case DW_FORM_ref_addr: // See hack in get_Form_contents
1950          if (cc->version == 2)
1951             return sizeof(UWord);
1952          else
1953             return sizeof_Dwarfish_UWord (cc->is_dw64);
1954       case DW_FORM_strp:
1955       case DW_FORM_line_strp:
1956          return sizeof_Dwarfish_UWord (cc->is_dw64);
1957       case DW_FORM_string:
1958          return VARSZ_FORM;
1959       case DW_FORM_ref1:
1960          return 1;
1961       case DW_FORM_ref2:
1962          return 2;
1963       case DW_FORM_ref4:
1964          return 4;
1965       case DW_FORM_ref8:
1966          return 8;
1967       case DW_FORM_ref_udata:
1968          return VARSZ_FORM;
1969       case DW_FORM_flag:
1970          return 1;
1971       case DW_FORM_flag_present:
1972          return 0; // !!! special case, no data.
1973       case DW_FORM_block1:
1974          return VARSZ_FORM;
1975       case DW_FORM_block2:
1976          return VARSZ_FORM;
1977       case DW_FORM_block4:
1978          return VARSZ_FORM;
1979       case DW_FORM_exprloc:
1980       case DW_FORM_block:
1981          return VARSZ_FORM;
1982       case DW_FORM_ref_sig8:
1983          return 8;
1984       case DW_FORM_indirect:
1985          return VARSZ_FORM;
1986       case DW_FORM_GNU_ref_alt:
1987          return sizeof_Dwarfish_UWord(cc->is_dw64);
1988       case DW_FORM_GNU_strp_alt:
1989          return sizeof_Dwarfish_UWord(cc->is_dw64);
1990       case DW_FORM_implicit_const:
1991          return 0; /* Value inside abbrev. */
1992       case DW_FORM_addrx:
1993          return VARSZ_FORM;
1994       case DW_FORM_strx:
1995          return VARSZ_FORM;
1996       case DW_FORM_addrx1:
1997       case DW_FORM_strx1:
1998          return 1;
1999       case DW_FORM_addrx2:
2000       case DW_FORM_strx2:
2001          return 2;
2002       case DW_FORM_addrx3:
2003       case DW_FORM_strx3:
2004          return 3;
2005       case DW_FORM_addrx4:
2006       case DW_FORM_strx4:
2007          return 4;
2008       default:
2009          VG_(printf)(
2010             "get_Form_szB: unhandled %u (%s)\n",
2011             form, ML_(pp_DW_FORM)(form));
2012          cc->barf("get_Form_contents: unhandled DW_FORM");
2013    }
2014 }
2015
2016 /* Skip a DIE as described by abbv.
2017    If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
2018 static
2019 void skip_DIE (UWord  *sibling,
2020                Cursor* c_die,
2021                const g_abbv *abbv,
2022                const CUConst* cc)
2023 {
2024    UInt nf_i;
2025    FormContents cts;
2026    nf_i = 0;
2027    while (True) {
2028       if (abbv->nf[nf_i].at_name == DW_AT_sibling) {
2029          get_Form_contents( &cts, cc, c_die, False /*td3*/,
2030                             &abbv->nf[nf_i] );
2031          if ( cts.szB > 0 )
2032             *sibling = cts.u.val;
2033          nf_i++;
2034       } else if (abbv->nf[nf_i].skip_szB == VARSZ_FORM) {
2035          DW_FORM form = abbv->nf[nf_i].at_form;
2036          if(form == DW_FORM_addrx || form == DW_FORM_strx
2037             || form == DW_FORM_rnglistx || form == DW_FORM_loclistx) {
2038             /* Skip without interpreting them, they may depend on e.g.
2039                DW_AT_addr_base that has not been read yet. */
2040             (void) get_ULEB128(c_die);
2041          } else
2042             get_Form_contents( &cts, cc, c_die, False /*td3*/,
2043                                &abbv->nf[nf_i] );
2044          nf_i++;
2045       } else {
2046          advance_position_of_Cursor (c_die, (ULong)abbv->nf[nf_i].skip_szB);
2047          nf_i = abbv->nf[nf_i].next_nf;
2048       }
2049       if (nf_i == 0)
2050          break;
2051    }
2052 }
2053
2054
2055 /*------------------------------------------------------------*/
2056 /*---                                                      ---*/
2057 /*--- Parsing of variable-related DIEs                     ---*/
2058 /*---                                                      ---*/
2059 /*------------------------------------------------------------*/
2060
2061 typedef
2062    struct _TempVar {
2063       const HChar*  name; /* in DebugInfo's .strpool */
2064       /* Represent ranges economically.  nRanges is the number of
2065          ranges.  Cases:
2066          0: .rngOneMin .rngOneMax .manyRanges are all zero
2067          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
2068          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
2069          This is merely an optimisation to avoid having to allocate
2070          and free the XArray in the common (98%) of cases where there
2071          is zero or one address ranges. */
2072       UWord   nRanges;
2073       Addr    rngOneMin;
2074       Addr    rngOneMax;
2075       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
2076       /* Do not free .rngMany, since many TempVars will have the same
2077          value.  Instead the associated storage is to be freed by
2078          deleting 'rangetree', which stores a single copy of each
2079          range. */
2080       /* --- */
2081       Int     level;
2082       UWord   typeR; /* a cuOff */
2083       GExpr*  gexpr; /* for this variable */
2084       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
2085                         any */
2086       UInt    fndn_ix; /* declaring file/dirname index in fndnpool, or 0 */
2087       Int     fLine; /* declaring file line number, or zero */
2088       /* offset in .debug_info, so that abstract instances can be
2089          found to satisfy references from concrete instances. */
2090       UWord   dioff;
2091       UWord   absOri; /* so the absOri fields refer to dioff fields
2092                          in some other, related TempVar. */
2093    }
2094    TempVar;
2095
2096 typedef
2097    struct {
2098       /* Contains the range stack: a stack of address ranges, one
2099          stack entry for each nested scope.
2100
2101          Some scope entries are created by function definitions
2102          (DW_AT_subprogram), and for those, we also note the GExpr
2103          derived from its DW_AT_frame_base attribute, if any.
2104          Consequently it should be possible to find, for any
2105          variable's DIE, the GExpr for the containing function's
2106          DW_AT_frame_base by scanning back through the stack to find
2107          the nearest entry associated with a function.  This somewhat
2108          elaborate scheme is provided so as to make it possible to
2109          obtain the correct DW_AT_frame_base expression even in the
2110          presence of nested functions (or to be more precise, in the
2111          presence of nested DW_AT_subprogram DIEs).
2112       */
2113       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
2114                      stack */
2115       Int     stack_size;
2116       XArray **ranges; /* XArray of AddrRange */
2117       Int     *level;  /* D3 DIE levels */
2118       Bool    *isFunc; /* from DW_AT_subprogram? */
2119       GExpr  **fbGX;   /* if isFunc, contains the FB expr, else NULL */
2120    }
2121    D3VarParser;
2122
2123 /* Completely initialise a variable parser object */
2124 static void
2125 var_parser_init ( D3VarParser *parser )
2126 {
2127    parser->sp = -1;
2128    parser->stack_size = 0;
2129    parser->ranges = NULL;
2130    parser->level  = NULL;
2131    parser->isFunc = NULL;
2132    parser->fbGX = NULL;
2133 }
2134
2135 /* Release any memory hanging off a variable parser object */
2136 static void
2137 var_parser_release ( D3VarParser *parser )
2138 {
2139    ML_(dinfo_free)( parser->ranges );
2140    ML_(dinfo_free)( parser->level );
2141    ML_(dinfo_free)( parser->isFunc );
2142    ML_(dinfo_free)( parser->fbGX );
2143 }
2144
2145 static void varstack_show ( const D3VarParser* parser, const HChar* str )
2146 {
2147    Word i, j;
2148    VG_(printf)("  varstack (%s) {\n", str);
2149    for (i = 0; i <= parser->sp; i++) {
2150       XArray* xa = parser->ranges[i];
2151       vg_assert(xa);
2152       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
2153       if (parser->isFunc[i]) {
2154          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
2155       } else {
2156          vg_assert(parser->fbGX[i] == NULL);
2157       }
2158       VG_(printf)(": ");
2159       if (VG_(sizeXA)( xa ) == 0) {
2160          VG_(printf)("** empty PC range array **");
2161       } else {
2162          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
2163             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
2164             vg_assert(range);
2165             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
2166          }
2167       }
2168       VG_(printf)("\n");
2169    }
2170    VG_(printf)("  }\n");
2171 }
2172
2173 /* Remove from the stack, all entries with .level > 'level' */
2174 static
2175 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
2176 {
2177    Bool changed = False;
2178    vg_assert(parser->sp < parser->stack_size);
2179    while (True) {
2180       vg_assert(parser->sp >= -1);
2181       if (parser->sp == -1) break;
2182       if (parser->level[parser->sp] <= level) break;
2183       if (0)
2184          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
2185       vg_assert(parser->ranges[parser->sp]);
2186       /* Who allocated this xa?  get_range_list() or
2187          unitary_range_list(). */
2188       VG_(deleteXA)( parser->ranges[parser->sp] );
2189       parser->sp--;
2190       changed = True;
2191    }
2192    if (changed && td3)
2193       varstack_show( parser, "after preen" );
2194 }
2195
2196 static void varstack_push ( const CUConst* cc,
2197                             D3VarParser* parser,
2198                             Bool td3,
2199                             XArray* ranges, Int level,
2200                             Bool    isFunc, GExpr* fbGX ) {
2201    if (0)
2202    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
2203             parser->sp+1, level, ranges);
2204
2205    /* First we need to zap everything >= 'level', as we are about to
2206       replace any previous entry at 'level', so .. */
2207    varstack_preen(parser, /*td3*/False, level-1);
2208
2209    vg_assert(parser->sp >= -1);
2210    vg_assert(parser->sp < parser->stack_size);
2211    if (parser->sp == parser->stack_size - 1) {
2212       parser->stack_size += 48;
2213       parser->ranges =
2214          ML_(dinfo_realloc)("di.readdwarf3.varpush.1", parser->ranges,
2215                             parser->stack_size * sizeof parser->ranges[0]);
2216       parser->level =
2217          ML_(dinfo_realloc)("di.readdwarf3.varpush.2", parser->level,
2218                             parser->stack_size * sizeof parser->level[0]);
2219       parser->isFunc =
2220          ML_(dinfo_realloc)("di.readdwarf3.varpush.3", parser->isFunc,
2221                             parser->stack_size * sizeof parser->isFunc[0]);
2222       parser->fbGX =
2223          ML_(dinfo_realloc)("di.readdwarf3.varpush.4", parser->fbGX,
2224                             parser->stack_size * sizeof parser->fbGX[0]);
2225    }
2226    if (parser->sp >= 0)
2227       vg_assert(parser->level[parser->sp] < level);
2228    parser->sp++;
2229    vg_assert(ranges != NULL);
2230    if (!isFunc) vg_assert(fbGX == NULL);
2231    parser->ranges[parser->sp] = ranges;
2232    parser->level[parser->sp]  = level;
2233    parser->isFunc[parser->sp] = isFunc;
2234    parser->fbGX[parser->sp]   = fbGX;
2235    if (TD3)
2236       varstack_show( parser, "after push" );
2237 }
2238
2239
2240 /* cts is derived from a DW_AT_location and so refers either to a
2241    location expression or to a location list.  Figure out which, and
2242    in both cases bundle the expression or location list into a
2243    so-called GExpr (guarded expression). */
2244 __attribute__((noinline))
2245 static GExpr* get_GX ( const CUConst* cc, Bool td3, const FormContents* cts )
2246 {
2247    GExpr* gexpr = NULL;
2248    if (cts->szB < 0) {
2249       /* represents a non-empty in-line location expression, and
2250          cts->u.cur points at the image bytes */
2251       gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
2252    }
2253    else
2254    if (cts->szB > 0) {
2255       /* represents a location list.  cts->u.val is the offset of it
2256          in .debug_loc. */
2257       if (!cc->cu_svma_known)
2258          cc->barf("get_GX: location list, but CU svma is unknown");
2259       gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
2260    }
2261    else {
2262       vg_assert(0); /* else caller is bogus */
2263    }
2264    return gexpr;
2265 }
2266
2267 static
2268 HChar * get_line_str (struct _DebugInfo* di, Bool is_dw64,
2269                       Cursor *data, const UInt form,
2270                       DiSlice debugstr_img, DiSlice debuglinestr_img)
2271 {
2272    HChar *str = NULL;
2273    switch (form) {
2274    case DW_FORM_string: {
2275       DiCursor distr = get_AsciiZ(data);
2276       str = ML_(cur_step_strdup)(&distr, "di.gls.string");
2277       break;
2278    }
2279    case DW_FORM_strp: {
2280       UWord uw = (UWord)get_Dwarfish_UWord( data, is_dw64 );
2281       DiCursor distr
2282          = ML_(cur_plus)( ML_(cur_from_sli)(debugstr_img), uw );
2283       str = ML_(cur_read_strdup)(distr, "di.gls.strp");
2284       break;
2285    }
2286    case DW_FORM_line_strp: {
2287       UWord uw = (UWord)get_Dwarfish_UWord( data, is_dw64 );
2288       DiCursor distr
2289          = ML_(cur_plus)( ML_(cur_from_sli)(debuglinestr_img), uw );
2290       str = ML_(cur_read_strdup)(distr, "di.gls.line_strp");
2291       break;
2292    }
2293    default:
2294       ML_(symerr)(di, True,
2295                   "Unknown path string FORM in .debug_line");
2296       break;
2297    }
2298    return str;
2299 }
2300
2301 static
2302 Int get_line_ndx (struct _DebugInfo* di,
2303                   Cursor *data, const UInt form)
2304 {
2305    Int res = 0;
2306    switch (form) {
2307    case DW_FORM_data1:
2308       res = get_UChar(data);
2309       break;
2310    case DW_FORM_data2:
2311       res = get_UShort(data);
2312       break;
2313    case DW_FORM_udata:
2314       res = get_ULEB128(data);
2315       break;
2316    default:
2317       ML_(symerr)(di, True,
2318                   "Unknown directory_index value FORM in .debug_line");
2319       break;
2320    }
2321    return res;
2322 }
2323
2324 static
2325 void skip_line_form (struct _DebugInfo* di, Bool is_dw64,
2326                          Cursor *d, const UInt form)
2327 {
2328    switch (form) {
2329    case DW_FORM_block: {
2330       ULong len = get_ULEB128(d);
2331       advance_position_of_Cursor (d, len);
2332       break;
2333    }
2334    case DW_FORM_block1: {
2335       UChar len = get_UChar(d);
2336       advance_position_of_Cursor (d, len);
2337       break;
2338    }
2339    case DW_FORM_block2: {
2340       UShort len = get_UShort(d);
2341       advance_position_of_Cursor (d, len);
2342       break;
2343    }
2344    case DW_FORM_block4: {
2345       UInt len = get_UInt(d);
2346       advance_position_of_Cursor (d, len);
2347       break;
2348    }
2349    case DW_FORM_flag:
2350    case DW_FORM_data1:
2351       advance_position_of_Cursor (d, 1);
2352       break;
2353    case DW_FORM_data2:
2354       advance_position_of_Cursor (d, 2);
2355       break;
2356    case DW_FORM_data4:
2357       advance_position_of_Cursor (d, 4);
2358       break;
2359    case DW_FORM_data8:
2360       advance_position_of_Cursor (d, 8);
2361       break;
2362    case DW_FORM_data16:
2363       advance_position_of_Cursor (d, 16);
2364       break;
2365    case DW_FORM_string:
2366       (void)get_AsciiZ (d);
2367       break;
2368    case DW_FORM_strp:
2369    case DW_FORM_line_strp:
2370    case DW_FORM_sec_offset:
2371       advance_position_of_Cursor (d, is_dw64 ? 8 : 4);
2372       break;
2373    case DW_FORM_udata:
2374       (void)get_ULEB128(d);
2375       break;
2376    case DW_FORM_sdata:
2377       (void)get_SLEB128(d);
2378       break;
2379    default:
2380       ML_(symerr)(di, True, "Unknown FORM in .debug_line");
2381       break;
2382    }
2383 }
2384
2385 /* Returns an xarray* of directory names (indexed by the dwarf dirname
2386    integer).
2387    If 'compdir' is NULL, entry [0] will be set to "."
2388    otherwise entry [0] is set to compdir.
2389    Entry [0] basically means "the current directory of the compilation",
2390    whatever that means, according to the DWARF3 spec.
2391    FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
2392 static
2393 XArray* read_dirname_xa (DebugInfo* di, UShort version, const HChar *compdir,
2394                          Cursor *c, const CUConst *cc,
2395                          Bool td3 )
2396 {
2397    XArray*        dirname_xa;   /* xarray of HChar* dirname */
2398    const HChar*   dirname;
2399    UInt           compdir_len;
2400
2401    dirname_xa = VG_(newXA) (ML_(dinfo_zalloc), "di.rdxa.1", ML_(dinfo_free),
2402                             sizeof(HChar*) );
2403
2404    if (compdir == NULL) {
2405       dirname = ".";
2406       compdir_len = 1;
2407    } else {
2408       dirname = compdir;
2409       compdir_len = VG_(strlen)(compdir);
2410    }
2411
2412    /* For version 5, the compdir is the first (zero) entry. */
2413    if (version < 5)
2414       VG_(addToXA) (dirname_xa, &dirname);
2415
2416    if (version < 5) {
2417       TRACE_D3("The Directory Table%s\n",
2418                peek_UChar(c) == 0 ? " is empty." : ":" );
2419
2420       while (peek_UChar(c) != 0) {
2421
2422          DiCursor cur = get_AsciiZ(c);
2423          HChar* data_str = ML_(cur_read_strdup)( cur, "dirname_xa.1" );
2424          TRACE_D3("  %s\n", data_str);
2425
2426          /* If data_str[0] is '/', then 'data' is an absolute path and we
2427             don't mess with it.  Otherwise, construct the
2428             path 'compdir' ++ "/" ++ 'data'. */
2429
2430          if (data_str[0] != '/'
2431              /* not an absolute path */
2432              && compdir
2433              /* actually got something sensible for compdir */
2434              && compdir_len)
2435          {
2436             SizeT  len = compdir_len + 1 + VG_(strlen)(data_str);
2437             HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1);
2438
2439             VG_(strcpy)(buf, compdir);
2440             VG_(strcat)(buf, "/");
2441             VG_(strcat)(buf, data_str);
2442
2443             dirname = ML_(addStr)(di, buf, len);
2444             VG_(addToXA) (dirname_xa, &dirname);
2445             if (0) VG_(printf)("rel path  %s\n", buf);
2446             ML_(dinfo_free)(buf);
2447          } else {
2448             /* just use 'data'. */
2449             dirname = ML_(addStr)(di,data_str,-1);
2450             VG_(addToXA) (dirname_xa, &dirname);
2451             if (0) VG_(printf)("abs path  %s\n", data_str);
2452          }
2453
2454          ML_(dinfo_free)(data_str);
2455       }
2456    } else {
2457       UChar forms[256];
2458       UChar p_ndx = 0;
2459       UInt directories_count;
2460       UChar directory_entry_format_count;
2461       UInt n;
2462       DiSlice debugstr_img = cc->escn_debug_str;
2463       DiSlice debuglinestr_img = cc->escn_debug_line_str;
2464
2465       directory_entry_format_count = get_UChar(c);
2466       for (n = 0; n < directory_entry_format_count; n++) {
2467          UInt lnct = get_ULEB128(c);
2468          UInt form = get_ULEB128(c);
2469          if (lnct == DW_LNCT_path)
2470             p_ndx = n;
2471          forms[n] = form;
2472       }
2473       directories_count = get_ULEB128(c);
2474       TRACE_D3("The Directory Table%s\n",
2475                directories_count == 0 ? " is empty." : ":" );
2476
2477       for (n = 0; n < directories_count; n++) {
2478          UInt f;
2479          for (f = 0; f < directory_entry_format_count; f++) {
2480             UInt form = forms[f];
2481             if (f == p_ndx) {
2482                HChar *data_str = get_line_str (di, cc->is_dw64, c, form,
2483                                                debugstr_img,
2484                                                debuglinestr_img);
2485                TRACE_D3("  %s\n", data_str);
2486
2487                /* If data_str[0] is '/', then 'data' is an absolute path and we
2488                   don't mess with it.  Otherwise, construct the
2489                   path 'compdir' ++ "/" ++ 'data'. */
2490
2491                if (data_str[0] != '/'
2492                    /* not an absolute path */
2493                    && compdir
2494                    /* actually got something sensible for compdir */
2495                    && compdir_len)
2496                {
2497                   SizeT  len = compdir_len + 1 + VG_(strlen)(data_str);
2498                   HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1);
2499
2500                   VG_(strcpy)(buf, compdir);
2501                   VG_(strcat)(buf, "/");
2502                   VG_(strcat)(buf, data_str);
2503
2504                   dirname = ML_(addStr)(di, buf, len);
2505                   VG_(addToXA) (dirname_xa, &dirname);
2506                   if (0) VG_(printf)("rel path  %s\n", buf);
2507                   ML_(dinfo_free)(buf);
2508                } else {
2509                   /* just use 'data'. */
2510                   dirname = ML_(addStr)(di,data_str,-1);
2511                   VG_(addToXA) (dirname_xa, &dirname);
2512                   if (0) VG_(printf)("abs path  %s\n", data_str);
2513                }
2514
2515                ML_(dinfo_free)(data_str);
2516             } else {
2517                skip_line_form (di, cc->is_dw64, c, form);
2518             }
2519          }
2520       }
2521    }
2522
2523    TRACE_D3 ("\n");
2524
2525    if (version < 5 && get_UChar (c) != 0) {
2526       ML_(symerr)(NULL, True,
2527                   "could not get NUL at end of DWARF directory table");
2528       VG_(deleteXA)(dirname_xa);
2529       return NULL;
2530    }
2531
2532    return dirname_xa;
2533 }
2534
2535 static
2536 void read_filename_table( /*MOD*/XArray* /* of UInt* */ fndn_ix_Table,
2537                           const HChar* compdir,
2538                           const CUConst* cc, ULong debug_line_offset,
2539                           Bool td3 )
2540 {
2541    Bool   is_dw64;
2542    Cursor c;
2543    Word   i;
2544    UShort version;
2545    UChar  opcode_base;
2546    const HChar* str;
2547    XArray* dirname_xa;   /* xarray of HChar* dirname */
2548    ULong  dir_xa_ix;     /* Index in dirname_xa, as read from dwarf info. */
2549    const HChar* dirname;
2550    UInt   fndn_ix;
2551
2552    vg_assert(fndn_ix_Table && cc && cc->barf);
2553    if (!ML_(sli_is_valid)(cc->escn_debug_line)
2554        || cc->escn_debug_line.szB <= debug_line_offset) {
2555       cc->barf("read_filename_table: .debug_line is missing?");
2556    }
2557
2558    init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
2559                 "Overrun whilst reading .debug_line section(1)" );
2560
2561    /* unit_length = */
2562    get_Initial_Length( &is_dw64, &c,
2563                        "read_filename_table: invalid initial-length field" );
2564    version = get_UShort( &c );
2565    if (version != 2 && version != 3 && version != 4 && version != 5)
2566      cc->barf("read_filename_table: Only DWARF version 2, 3, 4 and 5 "
2567               "line info is currently supported.");
2568    if (version >= 5) {
2569       /* addrs_size = */ get_UChar( &c );
2570       /* seg_size =   */ get_UChar( &c );
2571    }
2572    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
2573    /*minimum_instruction_length = */ get_UChar( &c );
2574    if (version >= 4)
2575       /*maximum_operations_per_insn = */ get_UChar( &c );
2576    /*default_is_stmt            = */ get_UChar( &c );
2577    /*line_base                  = (Char)*/ get_UChar( &c );
2578    /*line_range                 = */ get_UChar( &c );
2579    opcode_base                = get_UChar( &c );
2580    /* skip over "standard_opcode_lengths" */
2581    for (i = 1; i < (Word)opcode_base; i++)
2582      (void)get_UChar( &c );
2583
2584    dirname_xa = read_dirname_xa(cc->di, version, compdir, &c, cc, td3);
2585
2586    /* Read and record the file names table */
2587    vg_assert( VG_(sizeXA)( fndn_ix_Table ) == 0 );
2588    if (version < 5) {
2589       /* Add a dummy index-zero entry.  DWARF3 numbers its files
2590          from 1, for some reason. */
2591       fndn_ix = ML_(addFnDn) ( cc->di, "<unknown_file>", NULL );
2592       VG_(addToXA)( fndn_ix_Table, &fndn_ix );
2593       while (peek_UChar(&c) != 0) {
2594          DiCursor cur = get_AsciiZ(&c);
2595          str = ML_(addStrFromCursor)( cc->di, cur );
2596          dir_xa_ix = get_ULEB128( &c );
2597          if (dirname_xa != NULL && dir_xa_ix < VG_(sizeXA) (dirname_xa))
2598             dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
2599          else
2600             dirname = NULL;
2601          fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
2602          TRACE_D3("  read_filename_table: %ld fndn_ix %u %s %s\n",
2603                   VG_(sizeXA)(fndn_ix_Table), fndn_ix,
2604                   dirname, str);
2605          VG_(addToXA)( fndn_ix_Table, &fndn_ix );
2606          (void)get_ULEB128( &c ); /* skip last mod time */
2607          (void)get_ULEB128( &c ); /* file size */
2608       }
2609    } else {
2610       UChar forms[256];
2611       UChar p_ndx = 0, d_ndx = 0;
2612       UInt file_names_count;
2613       UChar file_names_entry_format_count;
2614       UInt n;
2615       DiSlice debugstr_img = cc->escn_debug_str;
2616       DiSlice debuglinestr_img = cc->escn_debug_line_str;
2617       file_names_entry_format_count = get_UChar( &c );
2618       for (n = 0; n < file_names_entry_format_count; n++) {
2619          UInt lnct = get_ULEB128( &c );
2620          UInt form = get_ULEB128( &c );
2621          if (lnct == DW_LNCT_path)
2622             p_ndx = n;
2623          if (lnct == DW_LNCT_directory_index)
2624             d_ndx = n;
2625          forms[n] = form;
2626       }
2627       file_names_count = get_ULEB128( &c );
2628       for (n = 0; n < file_names_count; n++) {
2629          UInt f;
2630          dir_xa_ix  = 0;
2631          str = NULL;
2632          for (f = 0; f < file_names_entry_format_count; f++) {
2633             UInt form = forms[f];
2634             if (f == p_ndx)
2635                str = get_line_str (cc->di, cc->is_dw64, &c, form,
2636                                    debugstr_img, debuglinestr_img);
2637             else if (f == d_ndx)
2638                dir_xa_ix = get_line_ndx (cc->di, &c, form);
2639             else
2640                skip_line_form (cc->di, cc->is_dw64, &c, form);
2641          }
2642
2643          if (dirname_xa != NULL
2644              && dir_xa_ix >= 0 && dir_xa_ix < VG_(sizeXA) (dirname_xa))
2645             dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
2646          else
2647             dirname = NULL;
2648          fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
2649          TRACE_D3("  read_filename_table: %ld fndn_ix %u %s %s\n",
2650                   VG_(sizeXA)(fndn_ix_Table), fndn_ix,
2651                   dirname, str);
2652          VG_(addToXA)( fndn_ix_Table, &fndn_ix );
2653       }
2654    }
2655    /* We're done!  The rest of it is not interesting. */
2656    if (dirname_xa != NULL)
2657       VG_(deleteXA)(dirname_xa);
2658 }
2659
2660 /* setup_cu_svma to be called when a cu is found at level 0,
2661    to establish the cu_svma. */
2662 static void setup_cu_svma(CUConst* cc, Bool have_lo, Addr ip_lo, Bool td3)
2663 {
2664    Addr cu_svma;
2665    /* We have potentially more than one type of parser parsing the
2666       dwarf information. At least currently, each parser establishes
2667       the cu_svma. So, in case cu_svma_known, we check that the same
2668       result is obtained by the 2nd parsing of the cu.
2669
2670       Alternatively, we could reset cu_svma_known after each parsing
2671       and then check that we only see a single DW_TAG_compile_unit DIE
2672       at level 0, DWARF3 only allows exactly one top level DIE per
2673       CU. */
2674
2675    if (have_lo)
2676       cu_svma = ip_lo;
2677    else {
2678       /* Now, it may be that this DIE doesn't tell us the CU's
2679          SVMA, by way of not having a DW_AT_low_pc.  That's OK --
2680          the CU doesn't *have* to have its SVMA specified.
2681
2682          But as per last para D3 spec sec 3.1.1 ("Normal and
2683          Partial Compilation Unit Entries", "If the base address
2684          (viz, the SVMA) is undefined, then any DWARF entry of
2685          structure defined interms of the base address of that
2686          compilation unit is not valid.".  So that means, if whilst
2687          processing the children of this top level DIE (or their
2688          children, etc) we see a DW_AT_range, and cu_svma_known is
2689          False, then the DIE that contains it is (per the spec)
2690          invalid, and we can legitimately stop and complain. */
2691       /* .. whereas The Reality is, simply assume the SVMA is zero
2692          if it isn't specified. */
2693       cu_svma = 0;
2694    }
2695
2696    if (cc->cu_svma_known) {
2697       vg_assert (cu_svma == cc->cu_svma);
2698    } else {
2699       cc->cu_svma_known = True;
2700       cc->cu_svma = cu_svma;
2701       if (0)
2702          TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc->cu_svma);
2703    }
2704 }
2705
2706 /* Setup info from DW_AT_addr_base, DW_AT_str_offsets_base, DW_AT_rnglists_base
2707    and DW_AT_loclists_base. This needs to be done early, because other DW_AT_*
2708    info may depend on it. */
2709 static void setup_cu_bases(CUConst* cc, Cursor* c_die, const g_abbv* abbv)
2710 {
2711    FormContents cts;
2712    UInt nf_i;
2713    ULong saved_c_pos;
2714    if(cc->cu_has_addr_base && cc->cu_has_str_offsets_base
2715       && cc->cu_has_rnglists_base && cc->cu_has_loclists_base)
2716       return;
2717    saved_c_pos = get_position_of_Cursor(c_die);
2718    nf_i = 0;
2719    while (!cc->cu_has_addr_base || !cc->cu_has_str_offsets_base
2720       || !cc->cu_has_rnglists_base || !cc->cu_has_loclists_base) {
2721       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2722       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2723       const name_form *nf = &abbv->nf[nf_i];
2724       if (attr == 0 && form == 0)
2725          break;
2726       if (attr != DW_AT_addr_base && attr != DW_AT_str_offsets_base
2727           && attr != DW_AT_rnglists_base && attr != DW_AT_loclists_base) {
2728          const UInt form_szB = get_Form_szB (cc, form);
2729          if (form_szB == VARSZ_FORM) {
2730             if(form == DW_FORM_addrx || form == DW_FORM_strx
2731                || form == DW_FORM_rnglistx || form == DW_FORM_loclistx) {
2732                /* Skip without interpreting them, they depend on *_base. */
2733                (void) get_ULEB128(c_die);
2734             } else {
2735                /* Need to read the contents of this one to skip it. */
2736                get_Form_contents( &cts, cc, c_die, False /*td3*/,
2737                                   &abbv->nf[nf_i] );
2738             }
2739          } else {
2740             /* Skip without even reading it, as it may depend on *_base. */
2741             advance_position_of_Cursor (c_die, form_szB);
2742          }
2743          nf_i++;
2744          continue;
2745       }
2746       get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
2747       if (attr == DW_AT_addr_base && cts.szB > 0) {
2748          Addr addr_base = cts.u.val;
2749          if (cc->cu_has_addr_base)
2750             vg_assert (addr_base == cc->cu_addr_base);
2751          else {
2752             cc->cu_has_addr_base = True;
2753             cc->cu_addr_base = addr_base;
2754          }
2755       }
2756       if (attr == DW_AT_str_offsets_base && cts.szB > 0) {
2757          Addr str_offsets_base = cts.u.val;
2758          if (cc->cu_has_str_offsets_base)
2759             vg_assert (str_offsets_base == cc->cu_str_offsets_base);
2760          else {
2761             cc->cu_has_str_offsets_base = True;
2762             cc->cu_str_offsets_base = str_offsets_base;
2763          }
2764        }
2765       if (attr == DW_AT_rnglists_base && cts.szB > 0) {
2766          Addr rnglists_base = cts.u.val;
2767          if (cc->cu_has_rnglists_base)
2768             vg_assert (rnglists_base == cc->cu_rnglists_base);
2769          else {
2770             cc->cu_has_rnglists_base = True;
2771             cc->cu_rnglists_base = rnglists_base;
2772          }
2773        }
2774       if (attr == DW_AT_loclists_base && cts.szB > 0) {
2775          Addr loclists_base = cts.u.val;
2776          if (cc->cu_has_loclists_base)
2777             vg_assert (loclists_base == cc->cu_loclists_base);
2778          else {
2779             cc->cu_has_loclists_base = True;
2780             cc->cu_loclists_base = loclists_base;
2781          }
2782        }
2783        nf_i++;
2784    }
2785    set_position_of_Cursor(c_die, saved_c_pos);
2786 }
2787
2788 static void trace_DIE(
2789    DW_TAG dtag,
2790    UWord posn,
2791    Int level,
2792    UWord saved_die_c_offset,
2793    const g_abbv *abbv,
2794    const CUConst* cc)
2795 {
2796    Cursor c;
2797    FormContents cts;
2798    UWord sibling = 0;
2799    UInt nf_i;
2800    Bool  debug_types_flag;
2801    Bool  alt_flag;
2802    Cursor check_skip;
2803    UWord check_sibling = 0;
2804
2805    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
2806    init_Cursor (&c,
2807                 debug_types_flag ? cc->escn_debug_types :
2808                 alt_flag ? cc->escn_debug_info_alt : cc->escn_debug_info,
2809                 saved_die_c_offset, cc->barf,
2810                 "Overrun trace_DIE");
2811    check_skip = c;
2812    VG_(printf)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
2813                level, posn, (ULong) abbv->abbv_code, ML_(pp_DW_TAG)( dtag ),
2814                debug_types_flag ? " (in .debug_types)" : "",
2815                alt_flag ? " (in alternate .debug_info)" : "");
2816    nf_i = 0;
2817    while (True) {
2818       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2819       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2820       const name_form *nf = &abbv->nf[nf_i];
2821       nf_i++;
2822       if (attr == 0 && form == 0) break;
2823       VG_(printf)("     %-18s: ", ML_(pp_DW_AT)(attr));
2824       /* Get the form contents, so as to print them */
2825       get_Form_contents( &cts, cc, &c, True, nf );
2826       if (attr == DW_AT_sibling && cts.szB > 0) {
2827          sibling = cts.u.val;
2828       }
2829       VG_(printf)("\t\n");
2830    }
2831
2832    /* Verify that skipping a DIE gives the same displacement as
2833       tracing (i.e. reading) a DIE. If there is an inconsistency in
2834       the nr of bytes read by get_Form_contents and get_Form_szB, this
2835       should be detected by the below. Using --trace-symtab=yes
2836       --read-var-info=yes will ensure all DIEs are systematically
2837       verified. */
2838    skip_DIE (&check_sibling, &check_skip, abbv, cc);
2839    vg_assert (check_sibling == sibling);
2840    vg_assert (get_position_of_Cursor (&check_skip)
2841               == get_position_of_Cursor (&c));
2842 }
2843
2844 __attribute__((noreturn))
2845 static void dump_bad_die_and_barf(
2846    const HChar *whichparser,
2847    DW_TAG dtag,
2848    UWord posn,
2849    Int level,
2850    Cursor* c_die,
2851    UWord saved_die_c_offset,
2852    const g_abbv *abbv,
2853    const CUConst* cc)
2854 {
2855    trace_DIE (dtag, posn, level, saved_die_c_offset, abbv, cc);
2856    VG_(printf)("%s:\n", whichparser);
2857    cc->barf("confused by the above DIE");
2858 }
2859
2860 __attribute__((noinline))
2861 static void bad_DIE_confusion(int linenr)
2862 {
2863    VG_(printf)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr);
2864 }
2865 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
2866
2867 /* Reset the fndn_ix_Table.  When we come across the top level DIE for a CU we
2868    will copy all the file names out of the .debug_line img area and use this
2869    table to look up the copies when we later see filename numbers in
2870    DW_TAG_variables etc. The table can be be reused between parsers (var and
2871    inline) and between CUs. So we keep a copy of the last one parsed. Call
2872    reset_fndn_ix_table before reading a new one from a new offset.  */
2873 static
2874 void reset_fndn_ix_table (XArray** fndn_ix_Table, ULong *debug_line_offset,
2875                           ULong new_offset)
2876 {
2877    vg_assert (new_offset == -1
2878               || *debug_line_offset != new_offset);
2879    Int size = *fndn_ix_Table == NULL ? 0 : VG_(sizeXA) (*fndn_ix_Table);
2880    if (size > 0) {
2881       VG_(deleteXA) (*fndn_ix_Table);
2882       *fndn_ix_Table = NULL;
2883    }
2884    if (*fndn_ix_Table == NULL)
2885       *fndn_ix_Table = VG_(newXA)( ML_(dinfo_zalloc),
2886                                    "di.readdwarf3.reset_ix_table",
2887                                    ML_(dinfo_free),
2888                                    sizeof(UInt) );
2889    *debug_line_offset = new_offset;
2890 }
2891
2892 __attribute__((noinline))
2893 static void parse_var_DIE (
2894    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
2895    /*MOD*/XArray* /* of TempVar* */ tempvars,
2896    /*MOD*/XArray* /* of GExpr* */ gexprs,
2897    /*MOD*/D3VarParser* parser,
2898    XArray** fndn_ix_Table,
2899    ULong *debug_line_offset,
2900    DW_TAG dtag,
2901    UWord posn,
2902    Int level,
2903    Cursor* c_die,
2904    const g_abbv *abbv,
2905    CUConst* cc,
2906    Bool td3
2907 )
2908 {
2909    FormContents cts;
2910    UInt nf_i;
2911
2912    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2913
2914    varstack_preen( parser, td3, level-1 );
2915
2916    if (dtag == DW_TAG_compile_unit
2917        || dtag == DW_TAG_type_unit
2918        || dtag == DW_TAG_partial_unit
2919        || dtag == DW_TAG_skeleton_unit) {
2920       Bool have_lo    = False;
2921       Bool have_hi1   = False;
2922       Bool hiIsRelative = False;
2923       Bool have_range = False;
2924       Addr ip_lo    = 0;
2925       Addr ip_hi1   = 0;
2926       Addr rangeoff = 0;
2927       const HChar *compdir = NULL;
2928
2929       if (level == 0)
2930          setup_cu_bases(cc, c_die, abbv);
2931       nf_i = 0;
2932       while (True) {
2933          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2934          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2935          const name_form *nf = &abbv->nf[nf_i];
2936          nf_i++;
2937          if (attr == 0 && form == 0) break;
2938          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
2939          if (attr == DW_AT_low_pc && cts.szB > 0) {
2940             ip_lo   = cts.u.val;
2941             have_lo = True;
2942          }
2943          if (attr == DW_AT_high_pc && cts.szB > 0) {
2944             ip_hi1   = cts.u.val;
2945             have_hi1 = True;
2946             if (form != DW_FORM_addr)
2947                hiIsRelative = True;
2948          }
2949          if (attr == DW_AT_ranges && cts.szB > 0) {
2950             rangeoff   = cts.u.val;
2951             have_range = True;
2952          }
2953          if (attr == DW_AT_comp_dir) {
2954             if (cts.szB >= 0)
2955                cc->barf("parse_var_DIE compdir: expecting indirect string");
2956             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2957                                                "parse_var_DIE.compdir" );
2958             compdir = ML_(addStr)(cc->di, str, -1);
2959             ML_(dinfo_free) (str);
2960          }
2961          if (attr == DW_AT_stmt_list && cts.szB > 0) {
2962             if (cts.u.val != *debug_line_offset) {
2963                reset_fndn_ix_table( fndn_ix_Table, debug_line_offset,
2964                                     cts.u.val );
2965                read_filename_table( *fndn_ix_Table, compdir,
2966                                     cc, cts.u.val, td3 );
2967             }
2968          }
2969       }
2970       if (have_lo && have_hi1 && hiIsRelative)
2971          ip_hi1 += ip_lo;
2972
2973       /* Now, does this give us an opportunity to find this
2974          CU's svma? */
2975       if (level == 0)
2976          setup_cu_svma(cc, have_lo, ip_lo, td3);
2977
2978       /* Do we have something that looks sane? */
2979       if (have_lo && have_hi1 && (!have_range)) {
2980          if (ip_lo < ip_hi1)
2981             varstack_push( cc, parser, td3,
2982                            unitary_range_list(ip_lo, ip_hi1 - 1),
2983                            level,
2984                            False/*isFunc*/, NULL/*fbGX*/ );
2985          else if (ip_lo == 0 && ip_hi1 == 0)
2986             /* CU has no code, presumably?
2987                Such situations have been encountered for code
2988                compiled with -ffunction-sections -fdata-sections
2989                and linked with --gc-sections. Completely
2990                eliminated CU gives such 0 lo/hi pc. Similarly
2991                to a CU which has no lo/hi/range pc, we push
2992                an empty range list. */
2993             varstack_push( cc, parser, td3,
2994                            empty_range_list(),
2995                            level,
2996                            False/*isFunc*/, NULL/*fbGX*/ );
2997       } else
2998       if ((!have_lo) && (!have_hi1) && have_range) {
2999          varstack_push( cc, parser, td3,
3000                         get_range_list( cc, td3,
3001                                         rangeoff, cc->cu_svma ),
3002                         level,
3003                         False/*isFunc*/, NULL/*fbGX*/ );
3004       } else
3005       if ((!have_lo) && (!have_hi1) && (!have_range)) {
3006          /* CU has no code, presumably? */
3007          varstack_push( cc, parser, td3,
3008                         empty_range_list(),
3009                         level,
3010                         False/*isFunc*/, NULL/*fbGX*/ );
3011       } else
3012       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
3013          /* broken DIE created by gcc-4.3.X ?  Ignore the
3014             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
3015             instead. */
3016          varstack_push( cc, parser, td3,
3017                         get_range_list( cc, td3,
3018                                         rangeoff, cc->cu_svma ),
3019                         level,
3020                         False/*isFunc*/, NULL/*fbGX*/ );
3021       } else {
3022          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
3023                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
3024          goto_bad_DIE;
3025       }
3026    }
3027
3028    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
3029       Bool   have_lo    = False;
3030       Bool   have_hi1   = False;
3031       Bool   have_range = False;
3032       Bool   hiIsRelative = False;
3033       Addr   ip_lo      = 0;
3034       Addr   ip_hi1     = 0;
3035       Addr   rangeoff   = 0;
3036       Bool   isFunc     = dtag == DW_TAG_subprogram;
3037       GExpr* fbGX       = NULL;
3038       nf_i = 0;
3039       while (True) {
3040          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3041          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3042          const name_form *nf = &abbv->nf[nf_i];
3043          nf_i++;
3044          if (attr == 0 && form == 0) break;
3045          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3046          if (attr == DW_AT_low_pc && cts.szB > 0) {
3047             ip_lo   = cts.u.val;
3048             have_lo = True;
3049          }
3050          if (attr == DW_AT_high_pc && cts.szB > 0) {
3051             ip_hi1   = cts.u.val;
3052             have_hi1 = True;
3053             if (form != DW_FORM_addr)
3054                hiIsRelative = True;
3055          }
3056          if (attr == DW_AT_ranges && cts.szB > 0) {
3057             rangeoff   = cts.u.val;
3058             have_range = True;
3059          }
3060          if (isFunc
3061              && attr == DW_AT_frame_base
3062              && cts.szB != 0 /* either scalar or nonempty block */) {
3063             fbGX = get_GX( cc, False/*td3*/, &cts );
3064             vg_assert(fbGX);
3065             VG_(addToXA)(gexprs, &fbGX);
3066          }
3067       }
3068       if (have_lo && have_hi1 && hiIsRelative)
3069          ip_hi1 += ip_lo;
3070       /* Do we have something that looks sane? */
3071       if (dtag == DW_TAG_subprogram
3072           && (!have_lo) && (!have_hi1) && (!have_range)) {
3073          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
3074             representing a subroutine declaration that is not also a
3075             definition does not have code address or range
3076             attributes." */
3077       } else
3078       if (dtag == DW_TAG_lexical_block
3079           && (!have_lo) && (!have_hi1) && (!have_range)) {
3080          /* I believe this is legit, and means the lexical block
3081             contains no insns (whatever that might mean).  Ignore. */
3082       } else
3083       if (have_lo && have_hi1 && (!have_range)) {
3084          /* This scope supplies just a single address range. */
3085          if (ip_lo < ip_hi1)
3086             varstack_push( cc, parser, td3,
3087                            unitary_range_list(ip_lo, ip_hi1 - 1),
3088                            level, isFunc, fbGX );
3089       } else
3090       if ((!have_lo) && (!have_hi1) && have_range) {
3091          /* This scope supplies multiple address ranges via the use of
3092             a range list. */
3093          varstack_push( cc, parser, td3,
3094                         get_range_list( cc, td3,
3095                                         rangeoff, cc->cu_svma ),
3096                         level, isFunc, fbGX );
3097       } else
3098       if (have_lo && (!have_hi1) && (!have_range)) {
3099          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
3100             Entries) says fairly clearly that a scope must have either
3101             _range or (_low_pc and _high_pc). */
3102          /* The spec is a bit ambiguous though.  Perhaps a single byte
3103             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
3104          /* This case is here because icc9 produced this:
3105          <2><13bd>: DW_TAG_lexical_block
3106             DW_AT_decl_line   : 5229
3107             DW_AT_decl_column : 37
3108             DW_AT_decl_file   : 1
3109             DW_AT_low_pc      : 0x401b03
3110          */
3111          /* Ignore (seems safe than pushing a single byte range) */
3112       } else
3113          goto_bad_DIE;
3114    }
3115
3116    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
3117       const  HChar* name = NULL;
3118       UWord  typeR       = D3_INVALID_CUOFF;
3119       Bool   global      = False;
3120       GExpr* gexpr       = NULL;
3121       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
3122       Int    lineNo      = 0;
3123       UInt   fndn_ix     = 0;
3124       nf_i = 0;
3125       while (True) {
3126          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3127          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3128          const name_form *nf = &abbv->nf[nf_i];
3129          nf_i++;
3130          if (attr == 0 && form == 0) break;
3131          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3132          if (attr == DW_AT_name && cts.szB < 0) {
3133             name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
3134          }
3135          if (attr == DW_AT_location
3136              && cts.szB != 0 /* either scalar or nonempty block */) {
3137             gexpr = get_GX( cc, False/*td3*/, &cts );
3138             vg_assert(gexpr);
3139             VG_(addToXA)(gexprs, &gexpr);
3140          }
3141          if (attr == DW_AT_type && cts.szB > 0) {
3142             typeR = cook_die_using_form( cc, cts.u.val, form );
3143          }
3144          if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
3145             global = True;
3146          }
3147          if (attr == DW_AT_abstract_origin && cts.szB > 0) {
3148             abs_ori = (UWord)cts.u.val;
3149          }
3150          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
3151             /*declaration = True;*/
3152          }
3153          if (attr == DW_AT_decl_line && cts.szB > 0) {
3154             lineNo = (Int)cts.u.val;
3155          }
3156          if (attr == DW_AT_decl_file && cts.szB > 0) {
3157             Int ftabIx = (Int)cts.u.val;
3158             if (ftabIx >= 1
3159                 && ftabIx < VG_(sizeXA)( *fndn_ix_Table )) {
3160                fndn_ix = *(UInt*)VG_(indexXA)( *fndn_ix_Table, ftabIx );
3161             }
3162             if (0) VG_(printf)("XXX filename fndn_ix = %u %s\n", fndn_ix,
3163                                ML_(fndn_ix2filename) (cc->di, fndn_ix));
3164          }
3165       }
3166       if (!global && dtag == DW_TAG_variable && level == 1) {
3167          /* Case of a static variable. It is better to declare
3168             it global as the variable is not really related to
3169             a PC range, as its address can be used by program
3170             counters outside of the ranges where it is visible . */
3171          global = True;
3172       }
3173
3174       /* We'll collect it under if one of the following three
3175          conditions holds:
3176          (1) has location and type    -> completed
3177          (2) has type only            -> is an abstract instance
3178          (3) has location and abs_ori -> is a concrete instance
3179          Name, fndn_ix and line number are all optional frills.
3180       */
3181       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
3182            /* 2 */ || (typeR != D3_INVALID_CUOFF)
3183            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
3184
3185          /* Add this variable to the list of interesting looking
3186             variables.  Crucially, note along with it the address
3187             range(s) associated with the variable, which for locals
3188             will be the address ranges at the top of the varparser's
3189             stack. */
3190          GExpr*   fbGX = NULL;
3191          Word     i, nRanges;
3192          const XArray*  /* of AddrRange */ xa;
3193          TempVar* tv;
3194          /* Stack can't be empty; we put a dummy entry on it for the
3195             entire address range before starting with the DIEs for
3196             this CU. */
3197          vg_assert(parser->sp >= 0);
3198
3199          /* If this is a local variable (non-global), try to find
3200             the GExpr for the DW_AT_frame_base of the containing
3201             function.  It should have been pushed on the stack at the
3202             time we encountered its DW_TAG_subprogram DIE, so the way
3203             to find it is to scan back down the stack looking for it.
3204             If there isn't an enclosing stack entry marked 'isFunc'
3205             then we must be seeing variable or formal param DIEs
3206             outside of a function, so we deem the Dwarf to be
3207             malformed if that happens.  Note that the fbGX may be NULL
3208             if the containing DT_TAG_subprogram didn't supply a
3209             DW_AT_frame_base -- that's OK, but there must actually be
3210             a containing DW_TAG_subprogram. */
3211          if (!global) {
3212             Bool found = False;
3213             for (i = parser->sp; i >= 0; i--) {
3214                if (parser->isFunc[i]) {
3215                   fbGX = parser->fbGX[i];
3216                   found = True;
3217                   break;
3218                }
3219             }
3220             if (!found) {
3221                if (0 && VG_(clo_verbosity) >= 0) {
3222                   VG_(message)(Vg_DebugMsg,
3223                      "warning: parse_var_DIE: non-global variable "
3224                      "outside DW_TAG_subprogram\n");
3225                }
3226                /* goto_bad_DIE; */
3227                /* This seems to happen a lot.  Just ignore it -- if,
3228                   when we come to evaluation of the location (guarded)
3229                   expression, it requires a frame base value, and
3230                   there's no expression for that, then evaluation as a
3231                   whole will fail.  Harmless - a bit of a waste of
3232                   cycles but nothing more. */
3233             }
3234          }
3235
3236          /* re "global ? 0 : parser->sp" (twice), if the var is
3237             marked 'global' then we must put it at the global scope,
3238             as only the global scope (level 0) covers the entire PC
3239             address space.  It is asserted elsewhere that level 0
3240             always covers the entire address space. */
3241          xa = parser->ranges[global ? 0 : parser->sp];
3242          nRanges = VG_(sizeXA)(xa);
3243          vg_assert(nRanges >= 0);
3244
3245          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
3246          tv->name   = name;
3247          tv->level  = global ? 0 : parser->sp;
3248          tv->typeR  = typeR;
3249          tv->gexpr  = gexpr;
3250          tv->fbGX   = fbGX;
3251          tv->fndn_ix= fndn_ix;
3252          tv->fLine  = lineNo;
3253          tv->dioff  = posn;
3254          tv->absOri = abs_ori;
3255
3256          /* See explanation on definition of type TempVar for the
3257             reason for this elaboration. */
3258          tv->nRanges = nRanges;
3259          tv->rngOneMin = 0;
3260          tv->rngOneMax = 0;
3261          tv->rngMany = NULL;
3262          if (nRanges == 1) {
3263             AddrRange* range = VG_(indexXA)(xa, 0);
3264             tv->rngOneMin = range->aMin;
3265             tv->rngOneMax = range->aMax;
3266          }
3267          else if (nRanges > 1) {
3268             /* See if we already have a range list which is
3269                structurally identical.  If so, use that; if not, clone
3270                this one, and add it to our collection. */
3271             UWord keyW, valW;
3272             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
3273                XArray* old = (XArray*)keyW;
3274                vg_assert(valW == 0);
3275                vg_assert(old != xa);
3276                tv->rngMany = old;
3277             } else {
3278                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
3279                tv->rngMany = cloned;
3280                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
3281             }
3282          }
3283
3284          VG_(addToXA)( tempvars, &tv );
3285
3286          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
3287                   VG_(sizeXA)(xa) );
3288          /* collect stats on how effective the ->ranges special
3289             casing is */
3290          if (0) {
3291             static Int ntot=0, ngt=0;
3292             ntot++;
3293             if (tv->rngMany) ngt++;
3294             if (0 == (ntot % 100000))
3295                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
3296          }
3297
3298       }
3299
3300       /* Here are some other weird cases seen in the wild:
3301
3302             We have a variable with a name and a type, but no
3303             location.  I guess that's a sign that it has been
3304             optimised away.  Ignore it.  Here's an example:
3305
3306             static Int lc_compar(void* n1, void* n2) {
3307                MC_Chunk* mc1 = *(MC_Chunk**)n1;
3308                MC_Chunk* mc2 = *(MC_Chunk**)n2;
3309                return (mc1->data < mc2->data ? -1 : 1);
3310             }
3311
3312             Both mc1 and mc2 are like this
3313             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
3314                 DW_AT_name        : mc1
3315                 DW_AT_decl_file   : 1
3316                 DW_AT_decl_line   : 216
3317                 DW_AT_type        : <5d3>
3318
3319             whereas n1 and n2 do have locations specified.
3320
3321             ---------------------------------------------
3322
3323             We see a DW_TAG_formal_parameter with a type, but
3324             no name and no location.  It's probably part of a function type
3325             construction, thusly, hence ignore it:
3326          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
3327              DW_AT_sibling     : <2c9>
3328              DW_AT_prototyped  : 1
3329              DW_AT_type        : <114>
3330          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
3331              DW_AT_type        : <13e>
3332          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
3333              DW_AT_type        : <133>
3334
3335             ---------------------------------------------
3336
3337             Is very minimal, like this:
3338             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
3339                 DW_AT_abstract_origin: <7ba>
3340             What that signifies I have no idea.  Ignore.
3341
3342             ----------------------------------------------
3343
3344             Is very minimal, like this:
3345             <200f>: DW_TAG_formal_parameter
3346                 DW_AT_abstract_ori: <1f4c>
3347                 DW_AT_location    : 13440
3348             What that signifies I have no idea.  Ignore.
3349             It might be significant, though: the variable at least
3350             has a location and so might exist somewhere.
3351             Maybe we should handle this.
3352
3353             ---------------------------------------------
3354
3355             <22407>: DW_TAG_variable
3356               DW_AT_name        : (indirect string, offset: 0x6579):
3357                                   vgPlain_trampoline_stuff_start
3358               DW_AT_decl_file   : 29
3359               DW_AT_decl_line   : 56
3360               DW_AT_external    : 1
3361               DW_AT_declaration : 1
3362
3363             Nameless and typeless variable that has a location?  Who
3364             knows.  Not me.
3365             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
3366                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
3367                                      (DW_OP_addr: 3813c7c0)
3368
3369             No, really.  Check it out.  gcc is quite simply borked.
3370             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
3371             // followed by no attributes, and the next DIE is a sibling,
3372             // not a child
3373             */
3374    }
3375    return;
3376
3377   bad_DIE:
3378    dump_bad_die_and_barf("parse_var_DIE", dtag, posn, level,
3379                          c_die, saved_die_c_offset,
3380                          abbv,
3381                          cc);
3382    /*NOTREACHED*/
3383 }
3384
3385 typedef
3386    struct {
3387       UWord sibling; // sibling of the last read DIE (if it has a sibling).
3388    }
3389    D3InlParser;
3390
3391 /* Return the function name corresponding to absori.
3392
3393    absori is a 'cooked' reference to a DIE, i.e. absori can be either
3394    in cc->escn_debug_info or in cc->escn_debug_info_alt.
3395    get_inlFnName will uncook absori.
3396
3397    The returned value is a (permanent) string in DebugInfo's .strchunks.
3398
3399    LIMITATION: absori must point in the CU of cc. If absori points
3400    in another CU, returns "UnknownInlinedFun".
3401
3402    Here are the problems to retrieve the fun name if absori is in
3403    another CU:  the DIE reading code cannot properly extract data from
3404    another CU, as the abbv code retrieved in the other CU cannot be
3405    translated in an abbreviation. Reading data from the alternate debug
3406    info also gives problems as the string reference is also in the alternate
3407    file, but when reading the alt DIE, the string form is a 'local' string,
3408    but cannot be read in the current CU, but must be read in the alt CU.
3409    See bug 338803 comment#3 and attachment for a failed attempt to handle
3410    these problems (failed because with the patch, only one alt abbrev hash
3411    table is kept, while we must handle all abbreviations in all CUs
3412    referenced by an absori (being a reference to an alt CU, or a previous
3413    or following CU). */
3414 static const HChar* get_inlFnName (Int absori, CUConst* cc, Bool td3)
3415 {
3416    Cursor c;
3417    const g_abbv *abbv;
3418    ULong  atag, abbv_code;
3419    UInt   has_children;
3420    UWord  posn;
3421    Bool type_flag, alt_flag;
3422    const HChar *ret = NULL;
3423    FormContents cts;
3424    UInt nf_i;
3425
3426    /* Some inlined subroutine call dwarf entries do not have the abstract
3427       origin attribute, resulting in absori being 0 (see callers of
3428       get_inlFnName). This is observed at least with gcc 6.3.0 when compiling
3429       valgrind with lto. So, in case we have a 0 absori, do not report an
3430       error, instead, rather return an unknown inlined function. */
3431    if (absori == 0) {
3432       static Bool absori0_reported = False;
3433       if (!absori0_reported && VG_(clo_verbosity) > 1) {
3434          VG_(message)(Vg_DebugMsg,
3435                       "Warning: inlined fn name without absori\n"
3436                       "is shown as UnknownInlinedFun\n");
3437          absori0_reported = True;
3438       }
3439       TRACE_D3(" <get_inlFnName>: absori is not set");
3440       return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
3441    }
3442
3443    posn = uncook_die( cc, absori, &type_flag, &alt_flag);
3444    if (type_flag)
3445       cc->barf("get_inlFnName: uncooked absori in type debug info");
3446
3447    /* LIMITATION: check we are in the same CU.
3448       If not, return unknown inlined function name. */
3449    /* if crossing between alt debug info<>normal info
3450           or posn not in the cu range,
3451       then it is in another CU. */
3452    if (alt_flag != cc->is_alt_info
3453        || posn < cc->cu_start_offset
3454        || posn >= cc->cu_start_offset + cc->unit_length) {
3455       static Bool reported = False;
3456       if (!reported && VG_(clo_verbosity) > 1) {
3457          VG_(message)(Vg_DebugMsg,
3458                       "Warning: cross-CU LIMITATION: some inlined fn names\n"
3459                       "might be shown as UnknownInlinedFun\n");
3460          reported = True;
3461       }
3462       TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn);
3463       return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
3464    }
3465
3466    init_Cursor (&c, cc->escn_debug_info, posn, cc->barf,
3467                 "Overrun get_inlFnName absori");
3468
3469    abbv_code = get_ULEB128( &c );
3470    abbv      = get_abbv ( cc, abbv_code, td3);
3471    atag      = abbv->atag;
3472    TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
3473             posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
3474
3475    if (atag == 0)
3476       cc->barf("get_inlFnName: invalid zero tag on DIE");
3477
3478    has_children = abbv->has_children;
3479    if (has_children != DW_children_no && has_children != DW_children_yes)
3480       cc->barf("get_inlFnName: invalid has_children value");
3481
3482    if (atag != DW_TAG_subprogram)
3483       cc->barf("get_inlFnName: absori not a subprogram");
3484
3485    nf_i = 0;
3486    while (True) {
3487       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3488       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3489       const name_form *nf = &abbv->nf[nf_i];
3490       nf_i++;
3491       if (attr == 0 && form == 0) break;
3492       get_Form_contents( &cts, cc, &c, False/*td3*/, nf );
3493       if (attr == DW_AT_name) {
3494          HChar *fnname;
3495          if (cts.szB >= 0)
3496             cc->barf("get_inlFnName: expecting indirect string");
3497          fnname = ML_(cur_read_strdup)( cts.u.cur,
3498                                         "get_inlFnName.1" );
3499          ret = ML_(addStr)(cc->di, fnname, -1);
3500          ML_(dinfo_free) (fnname);
3501          break; /* Name found, get out of the loop, as this has priority over
3502                  DW_AT_specification. */
3503       }
3504       if (attr == DW_AT_specification) {
3505          UWord cdie;
3506
3507          if (cts.szB == 0)
3508             cc->barf("get_inlFnName: AT specification missing");
3509
3510          /* The recursive call to get_inlFnName will uncook its arg.
3511             So, we need to cook it here, so as to reference the
3512             correct section (e.g. the alt info). */
3513          cdie = cook_die_using_form(cc, (UWord)cts.u.val, form);
3514
3515          /* hoping that there is no loop */
3516          ret = get_inlFnName (cdie, cc, td3);
3517          /* Unclear if having both DW_AT_specification and DW_AT_name is
3518             possible but in any case, we do not break here.
3519             If we find later on a DW_AT_name, it will override the name found
3520             in the DW_AT_specification.*/
3521       }
3522    }
3523
3524    if (ret)
3525       return ret;
3526    else {
3527       TRACE_D3("AbsOriFnNameNotFound");
3528       return ML_(addStr)(cc->di, "AbsOriFnNameNotFound", -1);
3529    }
3530 }
3531
3532 /* Returns True if the (possibly) childrens of the current DIE are interesting
3533    to parse. Returns False otherwise.
3534    If the current DIE has a sibling, the non interesting children can
3535    maybe be skipped (if the DIE has a DW_AT_sibling).  */
3536 __attribute__((noinline))
3537 static Bool parse_inl_DIE (
3538    /*MOD*/D3InlParser* parser,
3539    XArray** fndn_ix_Table,
3540    ULong *debug_line_offset,
3541    DW_TAG dtag,
3542    UWord posn,
3543    Int level,
3544    Cursor* c_die,
3545    const g_abbv *abbv,
3546    CUConst* cc,
3547    Bool td3
3548 )
3549 {
3550    FormContents cts;
3551    UInt nf_i;
3552
3553    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
3554
3555    /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit which in theory
3556       could also contain inlined fn calls, if they cover an address range.  */
3557    Bool unit_has_addrs = False;
3558    if (dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit
3559        || dtag == DW_TAG_skeleton_unit) {
3560       Bool have_lo    = False;
3561       Addr ip_lo    = 0;
3562       const HChar *compdir = NULL;
3563       Bool has_stmt_list = False;
3564       ULong cu_line_offset = 0;
3565
3566       if (level == 0)
3567          setup_cu_bases(cc, c_die, abbv);
3568       nf_i = 0;
3569       while (True) {
3570          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3571          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3572          const name_form *nf = &abbv->nf[nf_i];
3573          nf_i++;
3574          if (attr == 0 && form == 0) break;
3575          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3576          if (attr == DW_AT_low_pc && cts.szB > 0) {
3577             ip_lo   = cts.u.val;
3578             have_lo = True;
3579             unit_has_addrs = True;
3580          }
3581          if (attr == DW_AT_ranges && cts.szB > 0)
3582             unit_has_addrs = True;
3583          if (attr == DW_AT_comp_dir) {
3584             if (cts.szB >= 0)
3585                cc->barf("parse_inl_DIE compdir: expecting indirect string");
3586             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
3587                                                "parse_inl_DIE.compdir" );
3588             compdir = ML_(addStr)(cc->di, str, -1);
3589             ML_(dinfo_free) (str);
3590          }
3591          if (attr == DW_AT_stmt_list && cts.szB > 0) {
3592             has_stmt_list = True;
3593             cu_line_offset = cts.u.val;
3594          }
3595          if (attr == DW_AT_sibling && cts.szB > 0) {
3596             parser->sibling = cts.u.val;
3597          }
3598       }
3599       if (level == 0) {
3600          setup_cu_svma (cc, have_lo, ip_lo, td3);
3601          if (has_stmt_list && unit_has_addrs
3602             && *debug_line_offset != cu_line_offset) {
3603             reset_fndn_ix_table ( fndn_ix_Table, debug_line_offset,
3604                                   cu_line_offset );
3605             read_filename_table( *fndn_ix_Table, compdir,
3606                                  cc, cu_line_offset, td3 );
3607          }
3608       }
3609    }
3610
3611    if (dtag == DW_TAG_inlined_subroutine) {
3612       Bool   have_lo    = False;
3613       Bool   have_hi1   = False;
3614       Bool   have_range = False;
3615       Bool   hiIsRelative = False;
3616       Addr   ip_lo      = 0;
3617       Addr   ip_hi1     = 0;
3618       Addr   rangeoff   = 0;
3619       UInt   caller_fndn_ix = 0;
3620       Int caller_lineno = 0;
3621       Int inlinedfn_abstract_origin = 0;
3622       // 0 will be interpreted as no abstract origin by get_inlFnName
3623
3624       nf_i = 0;
3625       while (True) {
3626          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3627          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3628          const name_form *nf = &abbv->nf[nf_i];
3629          nf_i++;
3630          if (attr == 0 && form == 0) break;
3631          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3632          if (attr == DW_AT_call_file && cts.szB > 0) {
3633             Int ftabIx = (Int)cts.u.val;
3634             if (ftabIx >= 1
3635                 && ftabIx < VG_(sizeXA)( *fndn_ix_Table )) {
3636                caller_fndn_ix = *(UInt*)
3637                           VG_(indexXA)( *fndn_ix_Table, ftabIx );
3638             }
3639             if (0) VG_(printf)("XXX caller_fndn_ix = %u %s\n", caller_fndn_ix,
3640                                ML_(fndn_ix2filename) (cc->di, caller_fndn_ix));
3641          }
3642          if (attr == DW_AT_call_line && cts.szB > 0) {
3643             caller_lineno = cts.u.val;
3644          }
3645
3646          if (attr == DW_AT_abstract_origin  && cts.szB > 0) {
3647             inlinedfn_abstract_origin
3648                = cook_die_using_form (cc, (UWord)cts.u.val, form);
3649          }
3650
3651          if (attr == DW_AT_low_pc && cts.szB > 0) {
3652             ip_lo   = cts.u.val;
3653             have_lo = True;
3654          }
3655          if (attr == DW_AT_high_pc && cts.szB > 0) {
3656             ip_hi1   = cts.u.val;
3657             have_hi1 = True;
3658             if (form != DW_FORM_addr)
3659                hiIsRelative = True;
3660          }
3661          if (attr == DW_AT_ranges && cts.szB > 0) {
3662             rangeoff   = cts.u.val;
3663             have_range = True;
3664          }
3665          if (attr == DW_AT_sibling && cts.szB > 0) {
3666             parser->sibling = cts.u.val;
3667          }
3668       }
3669       if (have_lo && have_hi1 && hiIsRelative)
3670          ip_hi1 += ip_lo;
3671       /* Do we have something that looks sane? */
3672       if (dtag == DW_TAG_inlined_subroutine
3673           && (!have_lo) && (!have_hi1) && (!have_range)) {
3674          /* Seems strange. How can an inlined subroutine have
3675             no code ? */
3676          goto_bad_DIE;
3677       } else
3678       if (have_lo && have_hi1 && (!have_range)) {
3679          /* This inlined call is just a single address range. */
3680          if (ip_lo < ip_hi1) {
3681             /* Apply text debug biasing */
3682             ip_lo += cc->di->text_debug_bias;
3683             ip_hi1 += cc->di->text_debug_bias;
3684             ML_(addInlInfo) (cc->di,
3685                              ip_lo, ip_hi1,
3686                              get_inlFnName (inlinedfn_abstract_origin, cc, td3),
3687                              caller_fndn_ix,
3688                              caller_lineno, level);
3689          }
3690       } else if (have_range) {
3691          /* This inlined call is several address ranges. */
3692          XArray *ranges;
3693          Word j;
3694          const HChar *inlfnname =
3695             get_inlFnName (inlinedfn_abstract_origin, cc, td3);
3696
3697          /* Ranges are biased for the inline info using the same logic
3698             as what is used for biasing ranges for the var info, for which
3699             ranges are read using cc->cu_svma (see parse_var_DIE).
3700             Then text_debug_bias is added when a (non global) var
3701             is recorded (see just before the call to ML_(addVar)) */
3702          ranges = get_range_list( cc, td3,
3703                                   rangeoff, cc->cu_svma );
3704          for (j = 0; j < VG_(sizeXA)( ranges ); j++) {
3705             AddrRange* range = (AddrRange*) VG_(indexXA)( ranges, j );
3706             ML_(addInlInfo) (cc->di,
3707                              range->aMin   + cc->di->text_debug_bias,
3708                              range->aMax+1 + cc->di->text_debug_bias,
3709                              // aMax+1 as range has its last bound included
3710                              // while ML_(addInlInfo) expects last bound not
3711                              // included.
3712                              inlfnname,
3713                              caller_fndn_ix,
3714                              caller_lineno, level);
3715          }
3716          VG_(deleteXA)( ranges );
3717       } else
3718          goto_bad_DIE;
3719    }
3720
3721    // Only recursively parse the (possible) children for the DIE which
3722    // might maybe contain a DW_TAG_inlined_subroutine:
3723    Bool ret = (unit_has_addrs
3724                || dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram
3725                || dtag == DW_TAG_inlined_subroutine || dtag == DW_TAG_namespace);
3726    return ret;
3727
3728   bad_DIE:
3729    dump_bad_die_and_barf("parse_inl_DIE", dtag, posn, level,
3730                          c_die, saved_die_c_offset,
3731                          abbv,
3732                          cc);
3733    /*NOTREACHED*/
3734 }
3735
3736
3737 /*------------------------------------------------------------*/
3738 /*---                                                      ---*/
3739 /*--- Parsing of type-related DIEs                         ---*/
3740 /*---                                                      ---*/
3741 /*------------------------------------------------------------*/
3742
3743 typedef
3744    struct {
3745       /* What source language?  'A'=Ada83/95,
3746                                 'C'=C/C++,
3747                                 'F'=Fortran,
3748                                 '?'=other
3749          Established once per compilation unit. */
3750       UChar language;
3751       /* A stack of types which are currently under construction */
3752       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
3753                    stack */
3754       Int   stack_size;
3755       /* Note that the TyEnts in qparentE are temporary copies of the
3756          ones accumulating in the main tyent array.  So it is not safe
3757          to free up anything on them when popping them off the stack
3758          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
3759          memset them to zero when done. */
3760       TyEnt *qparentE; /* parent TyEnts */
3761       Int   *qlevel;
3762    }
3763    D3TypeParser;
3764
3765 /* Completely initialise a type parser object */
3766 static void
3767 type_parser_init ( D3TypeParser *parser )
3768 {
3769    parser->sp = -1;
3770    parser->language = '?';
3771    parser->stack_size = 0;
3772    parser->qparentE = NULL;
3773    parser->qlevel   = NULL;
3774 }
3775
3776 /* Release any memory hanging off a type parser object */
3777 static void
3778 type_parser_release ( D3TypeParser *parser )
3779 {
3780    ML_(dinfo_free)( parser->qparentE );
3781    ML_(dinfo_free)( parser->qlevel );
3782 }
3783
3784 static void typestack_show ( const D3TypeParser* parser, const HChar* str )
3785 {
3786    Word i;
3787    VG_(printf)("  typestack (%s) {\n", str);
3788    for (i = 0; i <= parser->sp; i++) {
3789       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
3790       ML_(pp_TyEnt)( &parser->qparentE[i] );
3791       VG_(printf)("\n");
3792    }
3793    VG_(printf)("  }\n");
3794 }
3795
3796 /* Remove from the stack, all entries with .level > 'level' */
3797 static
3798 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
3799 {
3800    Bool changed = False;
3801    vg_assert(parser->sp < parser->stack_size);
3802    while (True) {
3803       vg_assert(parser->sp >= -1);
3804       if (parser->sp == -1) break;
3805       if (parser->qlevel[parser->sp] <= level) break;
3806       if (0)
3807          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
3808       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3809       parser->sp--;
3810       changed = True;
3811    }
3812    if (changed && td3)
3813       typestack_show( parser, "after preen" );
3814 }
3815
3816 static Bool typestack_is_empty ( const D3TypeParser* parser )
3817 {
3818    vg_assert(parser->sp >= -1 && parser->sp < parser->stack_size);
3819    return parser->sp == -1;
3820 }
3821
3822 static void typestack_push ( const CUConst* cc,
3823                              D3TypeParser* parser,
3824                              Bool td3,
3825                              const TyEnt* parentE, Int level )
3826 {
3827    if (0)
3828    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
3829             parser->sp+1, level, parentE->cuOff);
3830
3831    /* First we need to zap everything >= 'level', as we are about to
3832       replace any previous entry at 'level', so .. */
3833    typestack_preen(parser, /*td3*/False, level-1);
3834
3835    vg_assert(parser->sp >= -1);
3836    vg_assert(parser->sp < parser->stack_size);
3837    if (parser->sp == parser->stack_size - 1) {
3838       parser->stack_size += 16;
3839       parser->qparentE =
3840          ML_(dinfo_realloc)("di.readdwarf3.typush.1", parser->qparentE,
3841                             parser->stack_size * sizeof parser->qparentE[0]);
3842       parser->qlevel =
3843          ML_(dinfo_realloc)("di.readdwarf3.typush.2", parser->qlevel,
3844                             parser->stack_size * sizeof parser->qlevel[0]);
3845    }
3846    if (parser->sp >= 0)
3847       vg_assert(parser->qlevel[parser->sp] < level);
3848    parser->sp++;
3849    vg_assert(parentE);
3850    vg_assert(ML_(TyEnt__is_type)(parentE));
3851    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
3852    parser->qparentE[parser->sp] = *parentE;
3853    parser->qlevel[parser->sp]  = level;
3854    if (TD3)
3855       typestack_show( parser, "after push" );
3856 }
3857
3858 /* True if the subrange type being parsed gives the bounds of an array. */
3859 static Bool subrange_type_denotes_array_bounds ( const D3TypeParser* parser,
3860                                                  DW_TAG dtag ) {
3861    vg_assert(dtag == DW_TAG_subrange_type);
3862    /* If we don't know the language, assume false.  */
3863    if (parser->language == '?')
3864       return False;
3865    /* For most languages, a subrange_type dtag always gives the
3866       bounds of an array.
3867       For Ada, there are additional conditions as a subrange_type
3868       is also used for other purposes. */
3869    if (parser->language != 'A')
3870       /* not Ada, so it definitely denotes an array bound. */
3871       return True;
3872    else
3873       /* Extra constraints for Ada: it only denotes an array bound if .. */
3874       return (! typestack_is_empty(parser)
3875               && parser->qparentE[parser->sp].tag == Te_TyArray);
3876 }
3877
3878 /* True if the form is one of the forms supported to give an array bound.
3879    For some arrays (scope local arrays with variable size),
3880    a DW_FORM_ref4 was used, and was wrongly used as the bound value.
3881    So, refuse the forms that are known to give a problem. */
3882 static Bool form_expected_for_bound ( DW_FORM form ) {
3883    if (form == DW_FORM_ref1
3884        || form == DW_FORM_ref2
3885        || form == DW_FORM_ref4
3886        || form == DW_FORM_ref8)
3887       return False;
3888
3889    return True;
3890 }
3891
3892 /* Parse a type-related DIE.  'parser' holds the current parser state.
3893    'admin' is where the completed types are dumped.  'dtag' is the tag
3894    for this DIE.  'c_die' points to the start of the data fields (FORM
3895    stuff) for the DIE.  abbv is the parsed abbreviation which describe
3896    the DIE.
3897
3898    We may find the DIE uninteresting, in which case we should ignore
3899    it.
3900
3901    What happens: the DIE is examined.  If uninteresting, it is ignored.
3902    Otherwise, the DIE gives rise to two things:
3903
3904    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
3905    (2) a TyAdmin structure, which holds the type, or related stuff
3906
3907    (2) is added at the end of 'tyadmins', at some index, say 'i'.
3908
3909    A pair (cuOffset, i) is added to 'tydict'.
3910
3911    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
3912    a mapping from cuOffset to the index of the corresponding entry in
3913    'tyadmin'.
3914
3915    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
3916    in the tydict (by binary search).  This gives an index into
3917    tyadmins, and the required entity lives in tyadmins at that index.
3918 */
3919 __attribute__((noinline))
3920 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
3921                              /*MOD*/D3TypeParser* parser,
3922                              DW_TAG dtag,
3923                              UWord posn,
3924                              Int level,
3925                              Cursor* c_die,
3926                              const g_abbv *abbv,
3927                              CUConst* cc,
3928                              Bool td3 )
3929 {
3930    FormContents cts;
3931    UInt nf_i;
3932    TyEnt typeE;
3933    TyEnt atomE;
3934    TyEnt fieldE;
3935    TyEnt boundE;
3936
3937    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
3938
3939    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
3940    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
3941    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
3942    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
3943
3944    /* If we've returned to a level at or above any previously noted
3945       parent, un-note it, so we don't believe we're still collecting
3946       its children. */
3947    typestack_preen( parser, td3, level-1 );
3948
3949    if (dtag == DW_TAG_compile_unit
3950        || dtag == DW_TAG_type_unit
3951        || dtag == DW_TAG_partial_unit
3952        || dtag == DW_TAG_skeleton_unit) {
3953       if (level == 0)
3954          setup_cu_bases(cc, c_die, abbv);
3955       /* See if we can find DW_AT_language, since it is important for
3956          establishing array bounds (see DW_TAG_subrange_type below in
3957          this fn) */
3958       nf_i = 0;
3959       while (True) {
3960          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3961          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3962          const name_form *nf = &abbv->nf[nf_i];
3963          nf_i++;
3964          if (attr == 0 && form == 0) break;
3965          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3966          if (attr != DW_AT_language)
3967             continue;
3968          if (cts.szB <= 0)
3969            goto_bad_DIE;
3970          switch (cts.u.val) {
3971             case DW_LANG_C89: case DW_LANG_C:
3972             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
3973             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
3974             case DW_LANG_Upc: case DW_LANG_C99: case DW_LANG_C11:
3975             case DW_LANG_C17: case DW_LANG_C23:
3976             case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14:
3977             case DW_LANG_C_plus_plus_17: case DW_LANG_C_plus_plus_20:
3978             case DW_LANG_C_plus_plus_23:
3979                parser->language = 'C'; break;
3980             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
3981             case DW_LANG_Fortran95: case DW_LANG_Fortran03:
3982             case DW_LANG_Fortran08: case DW_LANG_Fortran18:
3983             case DW_LANG_Fortran23:
3984                parser->language = 'F'; break;
3985             case DW_LANG_Ada83: case DW_LANG_Ada95:
3986             case DW_LANG_Ada2005: case DW_LANG_Ada2012:
3987                parser->language = 'A'; break;
3988             case DW_LANG_Cobol74:
3989             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
3990             case DW_LANG_Modula2: case DW_LANG_Java:
3991             case DW_LANG_PLI:
3992             case DW_LANG_D: case DW_LANG_Python:
3993             case DW_LANG_OpenCL: case DW_LANG_Go:
3994             case DW_LANG_Modula3: case DW_LANG_Haskell:
3995             case DW_LANG_OCaml: case DW_LANG_Rust: case DW_LANG_Swift:
3996             case DW_LANG_Julia: case DW_LANG_Dylan:
3997             case DW_LANG_RenderScript: case DW_LANG_BLISS:
3998             case DW_LANG_Kotlin: case DW_LANG_Zig:
3999             case DW_LANG_Crystal: case DW_LANG_HIP:
4000             case DW_LANG_Assembly: case DW_LANG_C_sharp:
4001             case DW_LANG_Mojo: case DW_LANG_GLSL:
4002             case DW_LANG_GLSL_ES: case DW_LANG_HLSL:
4003             case DW_LANG_OpenCL_CPP: case DW_LANG_CPP_for_OpenCL:
4004             case DW_LANG_SYCL:
4005             case DW_LANG_Odin:
4006             case DW_LANG_P4:
4007             case DW_LANG_Metal:
4008             case DW_LANG_Ruby:
4009             case DW_LANG_Move:
4010             case DW_LANG_Hylo:
4011             case DW_LANG_Mips_Assembler:
4012                parser->language = '?'; break;
4013             default:
4014                goto_bad_DIE;
4015          }
4016       }
4017    }
4018
4019    if (dtag == DW_TAG_base_type) {
4020       /* We can pick up a new base type any time. */
4021       VG_(memset)(&typeE, 0, sizeof(typeE));
4022       typeE.cuOff = D3_INVALID_CUOFF;
4023       typeE.tag   = Te_TyBase;
4024       nf_i = 0;
4025       while (True) {
4026          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4027          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4028          const name_form *nf = &abbv->nf[nf_i];
4029          nf_i++;
4030          if (attr == 0 && form == 0) break;
4031          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4032          if (attr == DW_AT_name && cts.szB < 0) {
4033             typeE.Te.TyBase.name
4034                = ML_(cur_read_strdup)( cts.u.cur,
4035                                        "di.readdwarf3.ptD.base_type.1" );
4036          }
4037          if (attr == DW_AT_byte_size && cts.szB > 0) {
4038             typeE.Te.TyBase.szB = cts.u.val;
4039          }
4040          if (attr == DW_AT_encoding && cts.szB > 0) {
4041             switch (cts.u.val) {
4042                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
4043                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
4044                case DW_ATE_boolean:/* FIXME - is this correct? */
4045                case DW_ATE_unsigned_fixed:
4046                   typeE.Te.TyBase.enc = 'U'; break;
4047                case DW_ATE_signed: case DW_ATE_signed_char:
4048                case DW_ATE_signed_fixed:
4049                   typeE.Te.TyBase.enc = 'S'; break;
4050                case DW_ATE_float:
4051                   typeE.Te.TyBase.enc = 'F'; break;
4052                case DW_ATE_complex_float:
4053                   typeE.Te.TyBase.enc = 'C'; break;
4054                default:
4055                   goto_bad_DIE;
4056             }
4057          }
4058       }
4059
4060       /* Invent a name if it doesn't have one.  gcc-4.3
4061          -ftree-vectorize is observed to emit nameless base types. */
4062       if (!typeE.Te.TyBase.name)
4063          typeE.Te.TyBase.name
4064             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
4065                                  "<anon_base_type>" );
4066
4067       /* Do we have something that looks sane? */
4068       if (/* must have a name */
4069           typeE.Te.TyBase.name == NULL
4070           /* and a plausible size.  Yes, really 32: "complex long
4071              double" apparently has size=32 */
4072           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
4073           /* and a plausible encoding */
4074           || (typeE.Te.TyBase.enc != 'U'
4075               && typeE.Te.TyBase.enc != 'S'
4076               && typeE.Te.TyBase.enc != 'F'
4077               && typeE.Te.TyBase.enc != 'C'))
4078          goto_bad_DIE;
4079       /* Last minute hack: if we see this
4080          <1><515>: DW_TAG_base_type
4081              DW_AT_byte_size   : 0
4082              DW_AT_encoding    : 5
4083              DW_AT_name        : void
4084          convert it into a real Void type. */
4085       if (typeE.Te.TyBase.szB == 0
4086           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
4087          ML_(TyEnt__make_EMPTY)(&typeE);
4088          typeE.tag = Te_TyVoid;
4089          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
4090       }
4091
4092       goto acquire_Type;
4093    }
4094
4095    /*
4096     * An example of DW_TAG_rvalue_reference_type:
4097     *
4098     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
4099     *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
4100     *     <1015>   DW_AT_byte_size   : 4
4101     *     <1016>   DW_AT_type        : <0xe52>
4102     */
4103    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
4104        || dtag == DW_TAG_ptr_to_member_type
4105        || dtag == DW_TAG_rvalue_reference_type) {
4106       /* This seems legit for _pointer_type and _reference_type.  I
4107          don't know if rolling _ptr_to_member_type in here really is
4108          legit, but it's better than not handling it at all. */
4109       VG_(memset)(&typeE, 0, sizeof(typeE));
4110       typeE.cuOff = D3_INVALID_CUOFF;
4111       switch (dtag) {
4112       case DW_TAG_pointer_type:
4113          typeE.tag = Te_TyPtr;
4114          break;
4115       case DW_TAG_reference_type:
4116          typeE.tag = Te_TyRef;
4117          break;
4118       case DW_TAG_ptr_to_member_type:
4119          typeE.tag = Te_TyPtrMbr;
4120          break;
4121       case DW_TAG_rvalue_reference_type:
4122          typeE.tag = Te_TyRvalRef;
4123          break;
4124       default:
4125          vg_assert(False);
4126       }
4127       /* target type defaults to void */
4128       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
4129       /* These four type kinds don't *have* to specify their size, in
4130          which case we assume it's a machine word.  But if they do
4131          specify it, it must be a machine word :-)  This probably
4132          assumes that the word size of the Dwarf3 we're reading is the
4133          same size as that on the machine.  gcc appears to give a size
4134          whereas icc9 doesn't. */
4135       typeE.Te.TyPorR.szB = sizeof(UWord);
4136       nf_i = 0;
4137       while (True) {
4138          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4139          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4140          const name_form *nf = &abbv->nf[nf_i];
4141          nf_i++;
4142          if (attr == 0 && form == 0) break;
4143          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4144          if (attr == DW_AT_byte_size && cts.szB > 0) {
4145             typeE.Te.TyPorR.szB = cts.u.val;
4146          }
4147          if (attr == DW_AT_type && cts.szB > 0) {
4148             typeE.Te.TyPorR.typeR
4149                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4150          }
4151       }
4152       /* Do we have something that looks sane? */
4153       if (typeE.Te.TyPorR.szB != sizeof(UWord))
4154          goto_bad_DIE;
4155       else
4156          goto acquire_Type;
4157    }
4158
4159    if (dtag == DW_TAG_enumeration_type) {
4160       /* Create a new Type to hold the results. */
4161       VG_(memset)(&typeE, 0, sizeof(typeE));
4162       typeE.cuOff = posn;
4163       typeE.tag   = Te_TyEnum;
4164       Bool is_decl = False;
4165       typeE.Te.TyEnum.atomRs
4166          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
4167                        ML_(dinfo_free),
4168                        sizeof(UWord) );
4169       nf_i=0;
4170       while (True) {
4171          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4172          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4173          const name_form *nf = &abbv->nf[nf_i];
4174          nf_i++;
4175          if (attr == 0 && form == 0) break;
4176          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4177          if (attr == DW_AT_name && cts.szB < 0) {
4178             typeE.Te.TyEnum.name
4179                = ML_(cur_read_strdup)( cts.u.cur,
4180                                        "di.readdwarf3.pTD.enum_type.2" );
4181          }
4182          if (attr == DW_AT_byte_size && cts.szB > 0) {
4183             typeE.Te.TyEnum.szB = cts.u.val;
4184          }
4185          if (attr == DW_AT_declaration) {
4186             is_decl = True;
4187          }
4188       }
4189
4190       if (!typeE.Te.TyEnum.name)
4191          typeE.Te.TyEnum.name
4192             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
4193                                  "<anon_enum_type>" );
4194
4195       /* Do we have something that looks sane? */
4196       if (typeE.Te.TyEnum.szB == 0
4197           /* we must know the size */
4198           /* but not for Ada, which uses such dummy
4199              enumerations as helper for gdb ada mode.
4200              Also GCC allows incomplete enums as GNU extension.
4201              http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
4202              These are marked as DW_AT_declaration and won't have
4203              a size. They can only be used in declaration or as
4204              pointer types.  You can't allocate variables or storage
4205              using such an enum type. (Also GCC seems to have a bug
4206              that will put such an enumeration_type into a .debug_types
4207              unit which should only contain complete types.) */
4208           && (parser->language != 'A' && !is_decl)) {
4209          goto_bad_DIE;
4210       }
4211
4212       /* On't stack! */
4213       typestack_push( cc, parser, td3, &typeE, level );
4214       goto acquire_Type;
4215    }
4216
4217    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
4218       DW_TAG_enumerator with only a DW_AT_name but no
4219       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
4220       and appears to be a new "feature" of gcc - versions 4.3.x and
4221       earlier do not appear to do this.  So accept DW_TAG_enumerator
4222       which only have a name but no value.  An example:
4223
4224       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
4225          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
4226                                      QtMsgType
4227          <185>   DW_AT_byte_size   : 4
4228          <186>   DW_AT_decl_file   : 14
4229          <187>   DW_AT_decl_line   : 1480
4230          <189>   DW_AT_sibling     : <0x1a7>
4231       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
4232          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
4233                                      QtDebugMsg
4234       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
4235          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
4236                                      QtWarningMsg
4237       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
4238          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
4239                                      QtCriticalMsg
4240       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
4241          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
4242                                      QtFatalMsg
4243       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
4244          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
4245                                      QtSystemMsg
4246    */
4247    if (dtag == DW_TAG_enumerator) {
4248       VG_(memset)( &atomE, 0, sizeof(atomE) );
4249       atomE.cuOff = posn;
4250       atomE.tag   = Te_Atom;
4251       nf_i = 0;
4252       while (True) {
4253          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4254          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4255          const name_form *nf = &abbv->nf[nf_i];
4256          nf_i++;
4257          if (attr == 0 && form == 0) break;
4258          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4259          if (attr == DW_AT_name && cts.szB < 0) {
4260             atomE.Te.Atom.name
4261               = ML_(cur_read_strdup)( cts.u.cur,
4262                                       "di.readdwarf3.pTD.enumerator.1" );
4263          }
4264          if (attr == DW_AT_const_value && cts.szB > 0) {
4265             atomE.Te.Atom.value      = cts.u.val;
4266             atomE.Te.Atom.valueKnown = True;
4267          }
4268       }
4269       /* Do we have something that looks sane? */
4270       if (atomE.Te.Atom.name == NULL)
4271          goto_bad_DIE;
4272       /* Do we have a plausible parent? */
4273       if (typestack_is_empty(parser)) goto_bad_DIE;
4274       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
4275       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
4276       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
4277       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
4278       /* Record this child in the parent */
4279       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
4280       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
4281                     &atomE );
4282       /* And record the child itself */
4283       goto acquire_Atom;
4284    }
4285
4286    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
4287       don't know if this is correct, but it at least makes this reader
4288       usable for gcc-4.3 produced Dwarf3. */
4289    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
4290        || dtag == DW_TAG_union_type) {
4291       Bool have_szB = False;
4292       Bool is_decl  = False;
4293       Bool is_spec  = False;
4294       /* Create a new Type to hold the results. */
4295       VG_(memset)(&typeE, 0, sizeof(typeE));
4296       typeE.cuOff = posn;
4297       typeE.tag   = Te_TyStOrUn;
4298       typeE.Te.TyStOrUn.name = NULL;
4299       typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
4300       typeE.Te.TyStOrUn.fieldRs
4301          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
4302                        ML_(dinfo_free),
4303                        sizeof(UWord) );
4304       typeE.Te.TyStOrUn.complete = True;
4305       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
4306                                    || dtag == DW_TAG_class_type;
4307       nf_i = 0;
4308       while (True) {
4309          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4310          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4311          const name_form *nf = &abbv->nf[nf_i];
4312          nf_i++;
4313          if (attr == 0 && form == 0) break;
4314          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4315          if (attr == DW_AT_name && cts.szB < 0) {
4316             typeE.Te.TyStOrUn.name
4317                = ML_(cur_read_strdup)( cts.u.cur,
4318                                        "di.readdwarf3.ptD.struct_type.2" );
4319          }
4320          if (attr == DW_AT_byte_size && cts.szB >= 0) {
4321             typeE.Te.TyStOrUn.szB = cts.u.val;
4322             have_szB = True;
4323          }
4324          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
4325             is_decl = True;
4326          }
4327          if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
4328             is_spec = True;
4329          }
4330          if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
4331              && cts.szB > 0) {
4332             have_szB = True;
4333             typeE.Te.TyStOrUn.szB = 8;
4334             typeE.Te.TyStOrUn.typeR
4335                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4336          }
4337       }
4338       /* Do we have something that looks sane? */
4339       if (is_decl && (!is_spec)) {
4340          /* It's a DW_AT_declaration.  We require the name but
4341             nothing else. */
4342          /* JRS 2012-06-28: following discussion w/ tromey, if the
4343             type doesn't have name, just make one up, and accept it.
4344             It might be referred to by other DIEs, so ignoring it
4345             doesn't seem like a safe option. */
4346          if (typeE.Te.TyStOrUn.name == NULL)
4347             typeE.Te.TyStOrUn.name
4348                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
4349                                     "<anon_struct_type>" );
4350          typeE.Te.TyStOrUn.complete = False;
4351          /* JRS 2009 Aug 10: <possible kludge>? */
4352          /* Push this tyent on the stack, even though it's incomplete.
4353             It appears that gcc-4.4 on Fedora 11 will sometimes create
4354             DW_TAG_member entries for it, and so we need to have a
4355             plausible parent present in order for that to work.  See
4356             #200029 comments 8 and 9. */
4357          typestack_push( cc, parser, td3, &typeE, level );
4358          /* </possible kludge> */
4359          goto acquire_Type;
4360       }
4361       if ((!is_decl) /* && (!is_spec) */) {
4362          /* this is the common, ordinary case */
4363          /* The name can be present, or not */
4364          if (!have_szB) {
4365             /* We must know the size.
4366                But in Ada, record with discriminants might have no size.
4367                But in C, VLA in the middle of a struct (gcc extension)
4368                might have no size.
4369                Instead, some GNAT dwarf extensions and/or dwarf entries
4370                allow to calculate the struct size at runtime.
4371                We cannot do that (yet?) so, the temporary kludge is to use
4372                a small size. */
4373             typeE.Te.TyStOrUn.szB = 1;
4374          }
4375          /* On't stack! */
4376          typestack_push( cc, parser, td3, &typeE, level );
4377          goto acquire_Type;
4378       }
4379       else {
4380          /* don't know how to handle any other variants just now */
4381          goto_bad_DIE;
4382       }
4383    }
4384
4385    if (dtag == DW_TAG_member) {
4386       /* Acquire member entries for both DW_TAG_structure_type and
4387          DW_TAG_union_type.  They differ minorly, in that struct
4388          members must have a DW_AT_data_member_location expression
4389          whereas union members must not. */
4390       Bool parent_is_struct;
4391       Bool is_artificial = False;
4392       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
4393       fieldE.cuOff = posn;
4394       fieldE.tag   = Te_Field;
4395       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
4396       nf_i = 0;
4397       while (True) {
4398          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4399          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4400          const name_form *nf = &abbv->nf[nf_i];
4401          nf_i++;
4402          if (attr == 0 && form == 0) break;
4403          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4404          if (attr == DW_AT_name && cts.szB < 0) {
4405             fieldE.Te.Field.name
4406                = ML_(cur_read_strdup)( cts.u.cur,
4407                                        "di.readdwarf3.ptD.member.1" );
4408          }
4409          if (attr == DW_AT_type && cts.szB > 0) {
4410             fieldE.Te.Field.typeR
4411                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4412          }
4413          /* There are 2 different cases for DW_AT_data_member_location.
4414             If it is a constant class attribute, it contains byte offset
4415             from the beginning of the containing entity.
4416             Otherwise it is a location expression.  */
4417          if (attr == DW_AT_data_member_location && cts.szB > 0) {
4418             fieldE.Te.Field.nLoc = -1;
4419             fieldE.Te.Field.pos.offset = cts.u.val;
4420          }
4421          if (attr == DW_AT_data_member_location && cts.szB <= 0) {
4422             fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
4423             fieldE.Te.Field.pos.loc
4424                = ML_(cur_read_memdup)( cts.u.cur,
4425                                        (SizeT)fieldE.Te.Field.nLoc,
4426                                        "di.readdwarf3.ptD.member.2" );
4427          }
4428          if (attr == DW_AT_artificial && cts.u.val == 1)
4429             is_artificial = True;
4430       }
4431       /* Skip artificial members, they might not behave as expected.  */
4432       if (is_artificial)
4433          goto no_location;
4434       /* Do we have a plausible parent? */
4435       if (typestack_is_empty(parser)) goto_bad_DIE;
4436       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
4437       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
4438       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
4439       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
4440       /* Do we have something that looks sane?  If this a member of a
4441          struct, we must have a location expression; but if a member
4442          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
4443          to reject in the latter case, but some compilers have been
4444          observed to emit constant-zero expressions.  So just ignore
4445          them. */
4446       parent_is_struct
4447          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
4448       if (!fieldE.Te.Field.name)
4449          fieldE.Te.Field.name
4450             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
4451                                  "<anon_field>" );
4452       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
4453          goto_bad_DIE;
4454       if (fieldE.Te.Field.nLoc) {
4455          if (!parent_is_struct) {
4456             /* If this is a union type, pretend we haven't seen the data
4457                member location expression, as it is by definition
4458                redundant (it must be zero). */
4459             if (fieldE.Te.Field.nLoc > 0)
4460                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
4461             fieldE.Te.Field.pos.loc = NULL;
4462             fieldE.Te.Field.nLoc = 0;
4463          }
4464          /* Record this child in the parent */
4465          fieldE.Te.Field.isStruct = parent_is_struct;
4466          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
4467          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
4468                        &posn );
4469          /* And record the child itself */
4470          goto acquire_Field;
4471       } else {
4472          /* Member with no location - this can happen with static
4473             const members in C++ code which are compile time constants
4474             that do no exist in the class. They're not of any interest
4475             to us so we ignore them. */
4476         no_location:
4477          ML_(TyEnt__make_EMPTY)(&fieldE);
4478       }
4479    }
4480
4481    if (dtag == DW_TAG_array_type) {
4482       VG_(memset)(&typeE, 0, sizeof(typeE));
4483       typeE.cuOff = posn;
4484       typeE.tag   = Te_TyArray;
4485       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
4486       typeE.Te.TyArray.boundRs
4487          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
4488                        ML_(dinfo_free),
4489                        sizeof(UWord) );
4490       nf_i = 0;
4491       while (True) {
4492          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4493          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4494          const name_form *nf = &abbv->nf[nf_i];
4495          nf_i++;
4496          if (attr == 0 && form == 0) break;
4497          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4498          if (attr == DW_AT_type && cts.szB > 0) {
4499             typeE.Te.TyArray.typeR
4500                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4501          }
4502       }
4503       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
4504          goto_bad_DIE;
4505       /* On't stack! */
4506       typestack_push( cc, parser, td3, &typeE, level );
4507       goto acquire_Type;
4508    }
4509
4510    /* this is a subrange type defining the bounds of an array. */
4511    if (dtag == DW_TAG_subrange_type
4512        && subrange_type_denotes_array_bounds(parser, dtag)) {
4513       Bool have_lower = False;
4514       Bool have_upper = False;
4515       Bool have_count = False;
4516       Long lower = 0;
4517       Long upper = 0;
4518       Long count = 0;
4519
4520       switch (parser->language) {
4521          case 'C': have_lower = True;  lower = 0; break;
4522          case 'F': have_lower = True;  lower = 1; break;
4523          case '?': have_lower = False; break;
4524          case 'A': have_lower = False; break;
4525          default:  vg_assert(0); /* assured us by handling of
4526                                     DW_TAG_compile_unit in this fn */
4527       }
4528
4529       VG_(memset)( &boundE, 0, sizeof(boundE) );
4530       boundE.cuOff = D3_INVALID_CUOFF;
4531       boundE.tag   = Te_Bound;
4532       nf_i = 0;
4533       while (True) {
4534          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4535          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4536          const name_form *nf = &abbv->nf[nf_i];
4537          nf_i++;
4538          if (attr == 0 && form == 0) break;
4539          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4540          if (attr == DW_AT_lower_bound && cts.szB > 0
4541              && form_expected_for_bound (form)) {
4542             lower      = (Long)cts.u.val;
4543             have_lower = True;
4544          }
4545          if (attr == DW_AT_upper_bound && cts.szB > 0
4546              && form_expected_for_bound (form)) {
4547             upper      = (Long)cts.u.val;
4548             have_upper = True;
4549          }
4550          if (attr == DW_AT_count && cts.szB > 0) {
4551             count    = (Long)cts.u.val;
4552             have_count = True;
4553          }
4554       }
4555       /* FIXME: potentially skip the rest if no parent present, since
4556          it could be the case that this subrange type is free-standing
4557          (not being used to describe the bounds of a containing array
4558          type) */
4559       /* Do we have a plausible parent? */
4560       if (typestack_is_empty(parser)) goto_bad_DIE;
4561       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
4562       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
4563       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
4564       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
4565
4566       /* Figure out if we have a definite range or not */
4567       if (have_lower && have_upper && (!have_count)) {
4568          boundE.Te.Bound.knownL = True;
4569          boundE.Te.Bound.knownU = True;
4570          boundE.Te.Bound.boundL = lower;
4571          boundE.Te.Bound.boundU = upper;
4572       }
4573       else if (have_lower && (!have_upper) && (!have_count)) {
4574          boundE.Te.Bound.knownL = True;
4575          boundE.Te.Bound.knownU = False;
4576          boundE.Te.Bound.boundL = lower;
4577          boundE.Te.Bound.boundU = 0;
4578       }
4579       else if ((!have_lower) && have_upper && (!have_count)) {
4580          boundE.Te.Bound.knownL = False;
4581          boundE.Te.Bound.knownU = True;
4582          boundE.Te.Bound.boundL = 0;
4583          boundE.Te.Bound.boundU = upper;
4584       }
4585       else if ((!have_lower) && (!have_upper) && (!have_count)) {
4586          boundE.Te.Bound.knownL = False;
4587          boundE.Te.Bound.knownU = False;
4588          boundE.Te.Bound.boundL = 0;
4589          boundE.Te.Bound.boundU = 0;
4590       } else if (have_lower && (!have_upper) && (have_count)) {
4591          boundE.Te.Bound.knownL = True;
4592          boundE.Te.Bound.knownU = True;
4593          boundE.Te.Bound.boundL = lower;
4594          boundE.Te.Bound.boundU = lower + count - 1;
4595       } else {
4596          /* FIXME: handle more cases */
4597          goto_bad_DIE;
4598       }
4599
4600       /* Record this bound in the parent */
4601       boundE.cuOff = posn;
4602       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
4603       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
4604                     &boundE.cuOff );
4605       /* And record the child itself */
4606       goto acquire_Bound;
4607    }
4608
4609    /* typedef or subrange_type other than array bounds. */
4610    if (dtag == DW_TAG_typedef
4611        || (dtag == DW_TAG_subrange_type
4612            && !subrange_type_denotes_array_bounds(parser, dtag))) {
4613       /* subrange_type other than array bound is only for Ada. */
4614       vg_assert (dtag == DW_TAG_typedef || (parser->language == 'A'
4615                                             || parser->language == '?'));
4616       /* We can pick up a new typedef/subrange_type any time. */
4617       VG_(memset)(&typeE, 0, sizeof(typeE));
4618       typeE.cuOff = D3_INVALID_CUOFF;
4619       typeE.tag   = Te_TyTyDef;
4620       typeE.Te.TyTyDef.name = NULL;
4621       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
4622       nf_i = 0;
4623       while (True) {
4624          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4625          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4626          const name_form *nf = &abbv->nf[nf_i];
4627          nf_i++;
4628          if (attr == 0 && form == 0) break;
4629          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4630          if (attr == DW_AT_name && cts.szB < 0) {
4631             typeE.Te.TyTyDef.name
4632                = ML_(cur_read_strdup)( cts.u.cur,
4633                                        "di.readdwarf3.ptD.typedef.1" );
4634          }
4635          if (attr == DW_AT_type && cts.szB > 0) {
4636             typeE.Te.TyTyDef.typeR
4637                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4638          }
4639       }
4640       /* Do we have something that looks sane?
4641          gcc gnat Ada generates minimal typedef
4642          such as the below
4643          <6><91cc>: DW_TAG_typedef
4644             DW_AT_abstract_ori: <9066>
4645          g++ for OMP can generate artificial functions that have
4646          parameters that refer to pointers to unnamed typedefs.
4647          See https://bugs.kde.org/show_bug.cgi?id=273475
4648          So we cannot require a name for a DW_TAG_typedef.
4649       */
4650       goto acquire_Type;
4651    }
4652
4653    if (dtag == DW_TAG_subroutine_type) {
4654       /* function type? just record that one fact and ask no
4655          further questions. */
4656       VG_(memset)(&typeE, 0, sizeof(typeE));
4657       typeE.cuOff = D3_INVALID_CUOFF;
4658       typeE.tag   = Te_TyFn;
4659       goto acquire_Type;
4660    }
4661
4662    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type
4663        || dtag == DW_TAG_restrict_type || dtag == DW_TAG_atomic_type) {
4664       Int have_ty = 0;
4665       VG_(memset)(&typeE, 0, sizeof(typeE));
4666       typeE.cuOff = D3_INVALID_CUOFF;
4667       typeE.tag   = Te_TyQual;
4668       typeE.Te.TyQual.qual
4669          = (dtag == DW_TAG_volatile_type ? 'V'
4670             : (dtag == DW_TAG_const_type ? 'C'
4671                : (dtag == DW_TAG_restrict_type ? 'R' : 'A')));
4672       /* target type defaults to 'void' */
4673       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
4674       nf_i = 0;
4675       while (True) {
4676          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4677          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4678          const name_form *nf = &abbv->nf[nf_i];
4679          nf_i++;
4680          if (attr == 0 && form == 0) break;
4681          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4682          if (attr == DW_AT_type && cts.szB > 0) {
4683             typeE.Te.TyQual.typeR
4684                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4685             have_ty++;
4686          }
4687       }
4688       /* gcc sometimes generates DW_TAG_const/volatile_type without
4689          DW_AT_type and GDB appears to interpret the type as 'const
4690          void' (resp. 'volatile void').  So just allow it .. */
4691       if (have_ty == 1 || have_ty == 0)
4692          goto acquire_Type;
4693       else
4694          goto_bad_DIE;
4695    }
4696
4697    /*
4698     * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
4699     *
4700     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
4701     *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
4702     *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
4703     */
4704    if (dtag == DW_TAG_unspecified_type) {
4705       VG_(memset)(&typeE, 0, sizeof(typeE));
4706       typeE.cuOff           = D3_INVALID_CUOFF;
4707       typeE.tag             = Te_TyQual;
4708       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
4709       goto acquire_Type;
4710    }
4711
4712    /* else ignore this DIE */
4713    return;
4714    /*NOTREACHED*/
4715
4716   acquire_Type:
4717    if (0) VG_(printf)("YYYY Acquire Type\n");
4718    vg_assert(ML_(TyEnt__is_type)( &typeE ));
4719    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
4720    typeE.cuOff = posn;
4721    VG_(addToXA)( tyents, &typeE );
4722    return;
4723    /*NOTREACHED*/
4724
4725   acquire_Atom:
4726    if (0) VG_(printf)("YYYY Acquire Atom\n");
4727    vg_assert(atomE.tag == Te_Atom);
4728    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
4729    atomE.cuOff = posn;
4730    VG_(addToXA)( tyents, &atomE );
4731    return;
4732    /*NOTREACHED*/
4733
4734   acquire_Field:
4735    /* For union members, Expr should be absent */
4736    if (0) VG_(printf)("YYYY Acquire Field\n");
4737    vg_assert(fieldE.tag == Te_Field);
4738    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
4739    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
4740    if (fieldE.Te.Field.isStruct) {
4741       vg_assert(fieldE.Te.Field.nLoc != 0);
4742    } else {
4743       vg_assert(fieldE.Te.Field.nLoc == 0);
4744    }
4745    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
4746    fieldE.cuOff = posn;
4747    VG_(addToXA)( tyents, &fieldE );
4748    return;
4749    /*NOTREACHED*/
4750
4751   acquire_Bound:
4752    if (0) VG_(printf)("YYYY Acquire Bound\n");
4753    vg_assert(boundE.tag == Te_Bound);
4754    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
4755    boundE.cuOff = posn;
4756    VG_(addToXA)( tyents, &boundE );
4757    return;
4758    /*NOTREACHED*/
4759
4760   bad_DIE:
4761    dump_bad_die_and_barf("parse_type_DIE", dtag, posn, level,
4762                          c_die, saved_die_c_offset,
4763                          abbv,
4764                          cc);
4765    /*NOTREACHED*/
4766 }
4767
4768
4769 /*------------------------------------------------------------*/
4770 /*---                                                      ---*/
4771 /*--- Compression of type DIE information                  ---*/
4772 /*---                                                      ---*/
4773 /*------------------------------------------------------------*/
4774
4775 static UWord chase_cuOff ( Bool* changed,
4776                            const XArray* /* of TyEnt */ ents,
4777                            TyEntIndexCache* ents_cache,
4778                            UWord cuOff )
4779 {
4780    TyEnt* ent;
4781    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
4782
4783    if (!ent) {
4784       if (VG_(clo_verbosity) > 1)
4785          VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
4786       *changed = False;
4787       return cuOff;
4788    }
4789
4790    vg_assert(ent->tag != Te_EMPTY);
4791    if (ent->tag != Te_INDIR) {
4792       *changed = False;
4793       return cuOff;
4794    } else {
4795       vg_assert(ent->Te.INDIR.indR < cuOff);
4796       *changed = True;
4797       return ent->Te.INDIR.indR;
4798    }
4799 }
4800
4801 static
4802 void chase_cuOffs_in_XArray ( Bool* changed,
4803                               const XArray* /* of TyEnt */ ents,
4804                               TyEntIndexCache* ents_cache,
4805                               /*MOD*/XArray* /* of UWord */ cuOffs )
4806 {
4807    Bool b2 = False;
4808    Word i, n = VG_(sizeXA)( cuOffs );
4809    for (i = 0; i < n; i++) {
4810       Bool   b = False;
4811       UWord* p = VG_(indexXA)( cuOffs, i );
4812       *p = chase_cuOff( &b, ents, ents_cache, *p );
4813       if (b)
4814          b2 = True;
4815    }
4816    *changed = b2;
4817 }
4818
4819 static Bool TyEnt__subst_R_fields ( const XArray* /* of TyEnt */ ents,
4820                                     TyEntIndexCache* ents_cache,
4821                                     /*MOD*/TyEnt* te )
4822 {
4823    Bool b, changed = False;
4824    switch (te->tag) {
4825       case Te_EMPTY:
4826          break;
4827       case Te_INDIR:
4828          te->Te.INDIR.indR
4829             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
4830          if (b) changed = True;
4831          break;
4832       case Te_UNKNOWN:
4833          break;
4834       case Te_Atom:
4835          break;
4836       case Te_Field:
4837          te->Te.Field.typeR
4838             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
4839          if (b) changed = True;
4840          break;
4841       case Te_Bound:
4842          break;
4843       case Te_TyBase:
4844          break;
4845       case Te_TyPtr:
4846       case Te_TyRef:
4847       case Te_TyPtrMbr:
4848       case Te_TyRvalRef:
4849          te->Te.TyPorR.typeR
4850             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
4851          if (b) changed = True;
4852          break;
4853       case Te_TyTyDef:
4854          te->Te.TyTyDef.typeR
4855             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
4856          if (b) changed = True;
4857          break;
4858       case Te_TyStOrUn:
4859          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
4860          if (b) changed = True;
4861          break;
4862       case Te_TyEnum:
4863          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
4864          if (b) changed = True;
4865          break;
4866       case Te_TyArray:
4867          te->Te.TyArray.typeR
4868             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
4869          if (b) changed = True;
4870          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
4871          if (b) changed = True;
4872          break;
4873       case Te_TyFn:
4874          break;
4875       case Te_TyQual:
4876          te->Te.TyQual.typeR
4877             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
4878          if (b) changed = True;
4879          break;
4880       case Te_TyVoid:
4881          break;
4882       default:
4883          ML_(pp_TyEnt)(te);
4884          vg_assert(0);
4885    }
4886    return changed;
4887 }
4888
4889 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
4890    'R' or 'Rs' fields (those which refer to other tyents), and replace
4891    any which point to INDIR nodes with the target of the indirection
4892    (which should not itself be an indirection).  In summary, this
4893    routine shorts out all references to indirection nodes. */
4894 static
4895 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
4896                                      TyEntIndexCache* ents_cache )
4897 {
4898    Word i, n, nChanged = 0;
4899    Bool b;
4900    n = VG_(sizeXA)( ents );
4901    for (i = 0; i < n; i++) {
4902       TyEnt* ent = VG_(indexXA)( ents, i );
4903       vg_assert(ent->tag != Te_EMPTY);
4904       /* We have to substitute everything, even indirections, so as to
4905          ensure that chains of indirections don't build up. */
4906       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
4907       if (b)
4908          nChanged++;
4909    }
4910
4911    return nChanged;
4912 }
4913
4914
4915 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
4916    Look up each new tyent in the dictionary in turn.  If it is already
4917    in the dictionary, replace this tyent with an indirection to the
4918    existing one, and delete any malloc'd stuff hanging off this one.
4919    In summary, this routine commons up all tyents that are identical
4920    as defined by TyEnt__cmp_by_all_except_cuOff. */
4921 static
4922 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
4923 {
4924    Word    n, i, nDeleted;
4925    WordFM* dict; /* TyEnt* -> void */
4926    TyEnt*  ent;
4927    UWord   keyW, valW;
4928
4929    dict = VG_(newFM)(
4930              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
4931              ML_(dinfo_free),
4932              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
4933           );
4934
4935    nDeleted = 0;
4936    n = VG_(sizeXA)( ents );
4937    for (i = 0; i < n; i++) {
4938       ent = VG_(indexXA)( ents, i );
4939       vg_assert(ent->tag != Te_EMPTY);
4940
4941       /* Ignore indirections, although check that they are
4942          not forming a cycle. */
4943       if (ent->tag == Te_INDIR) {
4944          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
4945          continue;
4946       }
4947
4948       keyW = valW = 0;
4949       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
4950          /* it's already in the dictionary. */
4951          TyEnt* old = (TyEnt*)keyW;
4952          vg_assert(valW == 0);
4953          vg_assert(old != ent);
4954          vg_assert(old->tag != Te_INDIR);
4955          /* since we are traversing the array in increasing order of
4956             cuOff: */
4957          vg_assert(old->cuOff < ent->cuOff);
4958          /* So anyway, dump this entry and replace it with an
4959             indirection to the one in the dictionary.  Note that the
4960             assertion above guarantees that we cannot create cycles of
4961             indirections, since we are always creating an indirection
4962             to a tyent with a cuOff lower than this one. */
4963          ML_(TyEnt__make_EMPTY)( ent );
4964          ent->tag = Te_INDIR;
4965          ent->Te.INDIR.indR = old->cuOff;
4966          nDeleted++;
4967       } else {
4968          /* not in dictionary; add it and keep going. */
4969          VG_(addToFM)( dict, (UWord)ent, 0 );
4970       }
4971    }
4972
4973    VG_(deleteFM)( dict, NULL, NULL );
4974
4975    return nDeleted;
4976 }
4977
4978
4979 static
4980 void dedup_types ( Bool td3,
4981                    /*MOD*/XArray* /* of TyEnt */ ents,
4982                    TyEntIndexCache* ents_cache )
4983 {
4984    Word m, n, i, nDel, nSubst, nThresh;
4985    if (0) td3 = True;
4986
4987    n = VG_(sizeXA)( ents );
4988
4989    /* If a commoning pass and a substitution pass both make fewer than
4990       this many changes, just stop.  It's pointless to burn up CPU
4991       time trying to compress the last 1% or so out of the array. */
4992    nThresh = n / 200;
4993
4994    /* First we must sort .ents by its .cuOff fields, so we
4995       can index into it. */
4996    VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4997    VG_(sortXA)( ents );
4998
4999    /* Now repeatedly do commoning and substitution passes over
5000       the array, until there are no more changes. */
5001    do {
5002       nDel   = dedup_types_commoning_pass ( ents );
5003       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
5004       vg_assert(nDel >= 0 && nSubst >= 0);
5005       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
5006    } while (nDel > nThresh || nSubst > nThresh);
5007
5008    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
5009       In fact this should be true at the end of every loop iteration
5010       above (a commoning pass followed by a substitution pass), but
5011       checking it on every iteration is excessively expensive.  Note,
5012       this loop also computes 'm' for the stats printing below it. */
5013    m = 0;
5014    n = VG_(sizeXA)( ents );
5015    for (i = 0; i < n; i++) {
5016       TyEnt *ent, *ind;
5017       ent = VG_(indexXA)( ents, i );
5018       if (ent->tag != Te_INDIR) continue;
5019       m++;
5020       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
5021                                          ent->Te.INDIR.indR );
5022       vg_assert(ind);
5023       vg_assert(ind->tag != Te_INDIR);
5024    }
5025
5026    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
5027 }
5028
5029
5030 /*------------------------------------------------------------*/
5031 /*---                                                      ---*/
5032 /*--- Resolution of references to type DIEs                ---*/
5033 /*---                                                      ---*/
5034 /*------------------------------------------------------------*/
5035
5036 /* Make a pass through the (temporary) variables array.  Examine the
5037    type of each variable, check is it found, and chase any Te_INDIRs.
5038    Postcondition is: each variable has a typeR field that refers to a
5039    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
5040    not to refer to a Te_INDIR.  (This is so that we can throw all the
5041    Te_INDIRs away later). */
5042
5043 __attribute__((noinline))
5044 static void resolve_variable_types (
5045                void (*barf)( const HChar* ) __attribute__((noreturn)),
5046                /*R-O*/XArray* /* of TyEnt */ ents,
5047                /*MOD*/TyEntIndexCache* ents_cache,
5048                /*MOD*/XArray* /* of TempVar* */ vars
5049             )
5050 {
5051    Word i, n;
5052    n = VG_(sizeXA)( vars );
5053    for (i = 0; i < n; i++) {
5054       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
5055       /* This is the stated type of the variable.  But it might be
5056          an indirection, so be careful. */
5057       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
5058                                                 var->typeR );
5059       if (ent && ent->tag == Te_INDIR) {
5060          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
5061                                             ent->Te.INDIR.indR );
5062          vg_assert(ent);
5063          vg_assert(ent->tag != Te_INDIR);
5064       }
5065
5066       /* Deal first with "normal" cases */
5067       if (ent && ML_(TyEnt__is_type)(ent)) {
5068          var->typeR = ent->cuOff;
5069          continue;
5070       }
5071
5072       /* If there's no ent, it probably we did not manage to read a
5073          type at the cuOffset which is stated as being this variable's
5074          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
5075       if (ent == NULL) {
5076          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
5077          barf("resolve_variable_types: "
5078               "cuOff does not refer to a known type");
5079       }
5080       vg_assert(ent);
5081       /* If ent has any other tag, something bad happened, along the
5082          lines of var->typeR not referring to a type at all. */
5083       vg_assert(ent->tag == Te_UNKNOWN);
5084       /* Just accept it; the type will be useless, but at least keep
5085          going. */
5086       var->typeR = ent->cuOff;
5087    }
5088 }
5089
5090
5091 /*------------------------------------------------------------*/
5092 /*---                                                      ---*/
5093 /*--- Parsing of Compilation Units                         ---*/
5094 /*---                                                      ---*/
5095 /*------------------------------------------------------------*/
5096
5097 static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
5098    const TempVar* t1 = *(const TempVar *const *)v1;
5099    const TempVar* t2 = *(const TempVar *const *)v2;
5100    if (t1->dioff < t2->dioff) return -1;
5101    if (t1->dioff > t2->dioff) return 1;
5102    return 0;
5103 }
5104
5105 static void read_DIE (
5106    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
5107    /*MOD*/XArray* /* of TyEnt */ tyents,
5108    /*MOD*/XArray* /* of TempVar* */ tempvars,
5109    /*MOD*/XArray* /* of GExpr* */ gexprs,
5110    /*MOD*/D3TypeParser* typarser,
5111    /*MOD*/D3VarParser* varparser,
5112    /*MOD*/D3InlParser* inlparser,
5113    XArray** fndn_ix_Table,
5114    ULong *debug_line_offset,
5115    Cursor* c, Bool td3, CUConst* cc, Int level
5116 )
5117 {
5118    const g_abbv *abbv;
5119    ULong  atag, abbv_code;
5120    UWord  posn;
5121    UInt   has_children;
5122    UWord  start_die_c_offset;
5123    UWord  after_die_c_offset;
5124    // If the DIE we will parse has a sibling and the parser(s) are
5125    // all indicating that parse_children is not necessary, then
5126    // we will skip the children by jumping to the sibling of this DIE
5127    // (if it has a sibling).
5128    UWord  sibling = 0;
5129    Bool   parse_children = False;
5130
5131    /* --- Deal with this DIE --- */
5132    posn      = cook_die( cc, get_position_of_Cursor( c ) );
5133    abbv_code = get_ULEB128( c );
5134    abbv = get_abbv(cc, abbv_code, td3);
5135    atag      = abbv->atag;
5136
5137    if (TD3) {
5138       TRACE_D3("\n");
5139       trace_DIE ((DW_TAG)atag, posn, level,
5140                  get_position_of_Cursor( c ), abbv, cc);
5141    }
5142
5143    if (atag == 0)
5144       cc->barf("read_DIE: invalid zero tag on DIE");
5145
5146    has_children = abbv->has_children;
5147    if (has_children != DW_children_no && has_children != DW_children_yes)
5148       cc->barf("read_DIE: invalid has_children value");
5149
5150    /* We're set up to look at the fields of this DIE.  Hand it off to
5151       any parser(s) that want to see it.  Since they will in general
5152       advance the DIE cursor, remember the current settings so that we
5153       can then back up. . */
5154    start_die_c_offset  = get_position_of_Cursor( c );
5155    after_die_c_offset  = 0; // set to c position if a parser has read the DIE.
5156
5157    if (VG_(clo_read_var_info)) {
5158       parse_type_DIE( tyents,
5159                       typarser,
5160                       (DW_TAG)atag,
5161                       posn,
5162                       level,
5163                       c,     /* DIE cursor */
5164                       abbv,  /* abbrev */
5165                       cc,
5166                       td3 );
5167       if (get_position_of_Cursor( c ) != start_die_c_offset) {
5168          after_die_c_offset = get_position_of_Cursor( c );
5169          set_position_of_Cursor( c, start_die_c_offset );
5170       }
5171
5172       parse_var_DIE( rangestree,
5173                      tempvars,
5174                      gexprs,
5175                      varparser,
5176                      fndn_ix_Table,
5177                      debug_line_offset,
5178                      (DW_TAG)atag,
5179                      posn,
5180                      level,
5181                      c,     /* DIE cursor */
5182                      abbv,  /* abbrev */
5183                      cc,
5184                      td3 );
5185       if (get_position_of_Cursor( c ) != start_die_c_offset) {
5186          after_die_c_offset = get_position_of_Cursor( c );
5187          set_position_of_Cursor( c, start_die_c_offset );
5188       }
5189
5190       parse_children = True;
5191       // type and var parsers do not have logic to skip childrens and establish
5192       // the value of sibling.
5193    }
5194
5195    if (VG_(clo_read_inline_info)) {
5196       inlparser->sibling = 0;
5197       parse_children =
5198          parse_inl_DIE( inlparser,
5199                         fndn_ix_Table,
5200                         debug_line_offset,
5201                         (DW_TAG)atag,
5202                         posn,
5203                         level,
5204                         c,     /* DIE cursor */
5205                         abbv, /* abbrev */
5206                         cc,
5207                         td3 )
5208          || parse_children;
5209       if (get_position_of_Cursor( c ) != start_die_c_offset) {
5210          after_die_c_offset = get_position_of_Cursor( c );
5211          // Last parser, no need to reset the cursor to start_die_c_offset.
5212       }
5213       if (sibling == 0)
5214          sibling = inlparser->sibling;
5215       vg_assert (inlparser->sibling == 0 || inlparser->sibling == sibling);
5216    }
5217
5218    /* Top level CU DIE, but we don't want to read anything else, just skip
5219       to the end and return.  */
5220    if (level == 0 && !parse_children) {
5221       UWord cu_size_including_IniLen = (cc->unit_length
5222                                         + (cc->is_dw64 ? 12 : 4));
5223       set_position_of_Cursor( c, (cc->cu_start_offset
5224                                   + cu_size_including_IniLen));
5225       return;
5226    }
5227
5228    if (after_die_c_offset > 0) {
5229       // DIE was read by a parser above, so we know where the DIE ends.
5230       set_position_of_Cursor( c, after_die_c_offset );
5231    } else {
5232       /* No parser has parsed this DIE. So, we need to skip the DIE,
5233          in order to read the next DIE.
5234          At the same time, establish sibling value if the DIE has one. */
5235       TRACE_D3("    uninteresting DIE -> skipping ...\n");
5236       skip_DIE (&sibling, c, abbv, cc);
5237    }
5238
5239    /* --- Now recurse into its children, if any
5240       and the parsing of the children is requested by a parser --- */
5241    if (has_children == DW_children_yes) {
5242       if (parse_children || sibling == 0) {
5243          if (0) TRACE_D3("BEGIN children of level %d\n", level);
5244          while (True) {
5245             atag = peek_ULEB128( c );
5246             if (atag == 0) break;
5247             if (parse_children) {
5248                read_DIE( rangestree, tyents, tempvars, gexprs,
5249                          typarser, varparser, inlparser,
5250                          fndn_ix_Table, debug_line_offset,
5251                          c, td3, cc, level+1 );
5252             } else {
5253                Int skip_level = level + 1;
5254                while (True) {
5255                   atag = peek_ULEB128( c );
5256                   if (atag == 0) {
5257                      skip_level--;
5258                      if (skip_level == level) break;
5259                      /* Eat the terminating zero and continue skipping the
5260                         children one level up.  */
5261                      atag = get_ULEB128( c );
5262                      vg_assert(atag == 0);
5263                      continue;
5264                   }
5265
5266                   abbv_code = get_ULEB128( c );
5267                   abbv = get_abbv(cc, abbv_code, td3);
5268                   sibling = 0;
5269                   skip_DIE (&sibling, c, abbv, cc);
5270                   if (abbv->has_children) {
5271                      if (sibling == 0)
5272                         skip_level++;
5273                      else
5274                         set_position_of_Cursor( c, sibling );
5275                   }
5276                }
5277             }
5278          }
5279          /* Now we need to eat the terminating zero */
5280          atag = get_ULEB128( c );
5281          vg_assert(atag == 0);
5282          if (0) TRACE_D3("END children of level %d\n", level);
5283       } else {
5284          // We can skip the childrens, by jumping to the sibling
5285          TRACE_D3("    SKIPPING DIE's children,"
5286                   "jumping to sibling <%d><%lx>\n",
5287                   level, sibling);
5288          set_position_of_Cursor( c, sibling );
5289       }
5290    }
5291
5292 }
5293
5294 static void trace_debug_loc (const DebugInfo* di,
5295                              __attribute__((noreturn)) void (*barf)( const HChar* ),
5296                              DiSlice escn_debug_loc)
5297 {
5298 #if 0
5299    /* This doesn't work properly because it assumes all entries are
5300       packed end to end, with no holes.  But that doesn't always
5301       appear to be the case, so it loses sync.  And the D3 spec
5302       doesn't appear to require a no-hole situation either. */
5303    /* Display .debug_loc */
5304    Addr  dl_base;
5305    UWord dl_offset;
5306    Cursor loc; /* for showing .debug_loc */
5307    Bool td3 = di->trace_symtab;
5308
5309    TRACE_SYMTAB("\n");
5310    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
5311    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
5312    if (ML_(sli_is_valid)(escn_debug_loc)) {
5313       init_Cursor( &loc, escn_debug_loc, 0, barf,
5314                    "Overrun whilst reading .debug_loc section(1)" );
5315       dl_base = 0;
5316       dl_offset = 0;
5317       while (True) {
5318          UWord  w1, w2;
5319          UWord  len;
5320          if (is_at_end_Cursor( &loc ))
5321             break;
5322
5323          /* Read a (host-)word pair.  This is something of a hack since
5324             the word size to read is really dictated by the ELF file;
5325             however, we assume we're reading a file with the same
5326             word-sizeness as the host.  Reasonably enough. */
5327          w1 = get_UWord( &loc );
5328          w2 = get_UWord( &loc );
5329
5330          if (w1 == 0 && w2 == 0) {
5331             /* end of list.  reset 'base' */
5332             TRACE_D3("    %08lx <End of list>\n", dl_offset);
5333             dl_base = 0;
5334             dl_offset = get_position_of_Cursor( &loc );
5335             continue;
5336          }
5337
5338          if (w1 == -1UL) {
5339             /* new value for 'base' */
5340             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
5341                      dl_offset, w1, w2);
5342             dl_base = w2;
5343             continue;
5344          }
5345
5346          /* else a location expression follows */
5347          TRACE_D3("    %08lx %08lx %08lx ",
5348                   dl_offset, w1 + dl_base, w2 + dl_base);
5349          len = (UWord)get_UShort( &loc );
5350          while (len > 0) {
5351             UChar byte = get_UChar( &loc );
5352             TRACE_D3("%02x", (UInt)byte);
5353             len--;
5354          }
5355          TRACE_SYMTAB("\n");
5356       }
5357    }
5358 #endif
5359 }
5360
5361 static void trace_debug_ranges (const DebugInfo* di,
5362                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
5363                                 DiSlice escn_debug_ranges)
5364 {
5365    Cursor ranges; /* for showing .debug_ranges */
5366    Addr  dr_base;
5367    UWord dr_offset;
5368    Bool td3 = di->trace_symtab;
5369
5370    /* Display .debug_ranges */
5371    TRACE_SYMTAB("\n");
5372    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
5373    TRACE_SYMTAB("    Offset   Begin    End\n");
5374    if (ML_(sli_is_valid)(escn_debug_ranges)) {
5375       init_Cursor( &ranges, escn_debug_ranges, 0, barf,
5376                    "Overrun whilst reading .debug_ranges section(1)" );
5377       dr_base = 0;
5378       dr_offset = 0;
5379       while (True) {
5380          UWord  w1, w2;
5381
5382          if (is_at_end_Cursor( &ranges ))
5383             break;
5384
5385          /* Read a (host-)word pair.  This is something of a hack since
5386             the word size to read is really dictated by the ELF file;
5387             however, we assume we're reading a file with the same
5388             word-sizeness as the host.  Reasonably enough. */
5389          w1 = get_UWord( &ranges );
5390          w2 = get_UWord( &ranges );
5391
5392          if (w1 == 0 && w2 == 0) {
5393             /* end of list.  reset 'base' */
5394             TRACE_D3("    %08lx <End of list>\n", dr_offset);
5395             dr_base = 0;
5396             dr_offset = get_position_of_Cursor( &ranges );
5397             continue;
5398          }
5399
5400          if (w1 == -1UL) {
5401             /* new value for 'base' */
5402             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
5403                      dr_offset, w1, w2);
5404             dr_base = w2;
5405             continue;
5406          }
5407
5408          /* else a range [w1+base, w2+base) is denoted */
5409          TRACE_D3("    %08lx %08lx %08lx\n",
5410                   dr_offset, w1 + dr_base, w2 + dr_base);
5411       }
5412    }
5413 }
5414
5415 static void trace_debug_abbrev (const DebugInfo* di,
5416                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
5417                                 DiSlice escn_debug_abbv)
5418 {
5419    Cursor abbv; /* for showing .debug_abbrev */
5420    Bool td3 = di->trace_symtab;
5421
5422    /* Display .debug_abbrev */
5423    TRACE_SYMTAB("\n");
5424    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
5425    if (ML_(sli_is_valid)(escn_debug_abbv)) {
5426       init_Cursor( &abbv, escn_debug_abbv, 0, barf,
5427                    "Overrun whilst reading .debug_abbrev section" );
5428       while (True) {
5429          if (is_at_end_Cursor( &abbv ))
5430             break;
5431          /* Read one abbreviation table */
5432          TRACE_D3("  Number TAG\n");
5433          while (True) {
5434             ULong atag;
5435             UInt  has_children;
5436             ULong acode = get_ULEB128( &abbv );
5437             if (acode == 0) break; /* end of the table */
5438             atag = get_ULEB128( &abbv );
5439             has_children = get_UChar( &abbv );
5440             TRACE_D3("   %llu      %s    [%s]\n",
5441                      acode, ML_(pp_DW_TAG)(atag),
5442                             ML_(pp_DW_children)(has_children));
5443             while (True) {
5444                ULong at_name = get_ULEB128( &abbv );
5445                ULong at_form = get_ULEB128( &abbv );
5446                if (at_form == DW_FORM_implicit_const) {
5447                   /* Long at_val = */ get_SLEB128 ( &abbv );
5448                }
5449                if (at_name == 0 && at_form == 0) break;
5450                TRACE_D3("    %-18s %s\n",
5451                         ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
5452             }
5453          }
5454       }
5455    }
5456 }
5457
5458 static
5459 void new_dwarf3_reader_wrk (
5460    DebugInfo* di,
5461    __attribute__((noreturn)) void (*barf)( const HChar* ),
5462    DiSlice escn_debug_info,      DiSlice escn_debug_types,
5463    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
5464    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
5465    DiSlice escn_debug_rnglists,  DiSlice escn_debug_loclists,
5466    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
5467    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
5468    DiSlice escn_debug_str_alt,   DiSlice escn_debug_line_str,
5469    DiSlice escn_debug_addr,      DiSlice escn_debug_str_offsets
5470 )
5471 {
5472    XArray* /* of TyEnt */     tyents = NULL;
5473    XArray* /* of TyEnt */     tyents_to_keep = NULL;
5474    XArray* /* of GExpr* */    gexprs = NULL;
5475    XArray* /* of TempVar* */  tempvars = NULL;
5476    WordFM* /* of (XArray* of AddrRange, void) */ rangestree = NULL;
5477    TyEntIndexCache* tyents_cache = NULL;
5478    TyEntIndexCache* tyents_to_keep_cache = NULL;
5479    TempVar *varp, *varp2;
5480    GExpr* gexpr;
5481    Cursor info; /* primary cursor for parsing .debug_info */
5482    D3TypeParser typarser;
5483    D3VarParser varparser;
5484    D3InlParser inlparser;
5485    XArray* /* of UInt */ fndn_ix_Table = NULL;
5486    ULong debug_line_offset = (ULong) -1;
5487    Word  i, j, n;
5488    Bool td3 = di->trace_symtab;
5489    XArray* /* of TempVar* */ dioff_lookup_tab;
5490    Int pass;
5491    VgHashTable *signature_types = NULL;
5492
5493    /* Display/trace various information, if requested. */
5494    if (TD3) {
5495       trace_debug_loc    (di, barf, escn_debug_loc);
5496       trace_debug_ranges (di, barf, escn_debug_ranges);
5497       trace_debug_abbrev (di, barf, escn_debug_abbv);
5498       TRACE_SYMTAB("\n");
5499    }
5500
5501    /* Zero out all parsers. Parsers will really be initialised
5502       according to VG_(clo_read_*_info). */
5503    VG_(memset)( &inlparser, 0, sizeof(inlparser) );
5504
5505    if (VG_(clo_read_var_info)) {
5506       /* We'll park the harvested type information in here.  Also create
5507          a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
5508          have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
5509          huge and presumably will not occur in any valid DWARF3 file --
5510          it would need to have a .debug_info section 4GB long for that to
5511          happen.  These type entries end up in the DebugInfo. */
5512       tyents = VG_(newXA)( ML_(dinfo_zalloc),
5513                            "di.readdwarf3.ndrw.1 (TyEnt temp array)",
5514                            ML_(dinfo_free), sizeof(TyEnt) );
5515       { TyEnt tyent;
5516         VG_(memset)(&tyent, 0, sizeof(tyent));
5517         tyent.tag   = Te_TyVoid;
5518         tyent.cuOff = D3_FAKEVOID_CUOFF;
5519         tyent.Te.TyVoid.isFake = True;
5520         VG_(addToXA)( tyents, &tyent );
5521       }
5522       { TyEnt tyent;
5523         VG_(memset)(&tyent, 0, sizeof(tyent));
5524         tyent.tag   = Te_UNKNOWN;
5525         tyent.cuOff = D3_INVALID_CUOFF;
5526         VG_(addToXA)( tyents, &tyent );
5527       }
5528
5529       /* This is a tree used to unique-ify the range lists that are
5530          manufactured by parse_var_DIE.  References to the keys in the
5531          tree wind up in .rngMany fields in TempVars.  We'll need to
5532          delete this tree, and the XArrays attached to it, at the end of
5533          this function. */
5534       rangestree = VG_(newFM)( ML_(dinfo_zalloc),
5535                                "di.readdwarf3.ndrw.2 (rangestree)",
5536                                ML_(dinfo_free),
5537                                (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
5538
5539       /* List of variables we're accumulating.  These don't end up in the
5540          DebugInfo; instead their contents are handed to ML_(addVar) and
5541          the list elements are then deleted. */
5542       tempvars = VG_(newXA)( ML_(dinfo_zalloc),
5543                              "di.readdwarf3.ndrw.3 (TempVar*s array)",
5544                              ML_(dinfo_free),
5545                              sizeof(TempVar*) );
5546
5547       /* List of GExprs we're accumulating.  These wind up in the
5548          DebugInfo. */
5549       gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
5550                            ML_(dinfo_free), sizeof(GExpr*) );
5551
5552       /* We need a D3TypeParser to keep track of partially constructed
5553          types.  It'll be discarded as soon as we've completed the CU,
5554          since the resulting information is tipped in to 'tyents' as it
5555          is generated. */
5556       type_parser_init(&typarser);
5557
5558       var_parser_init(&varparser);
5559
5560       signature_types = VG_(HT_construct) ("signature_types");
5561    }
5562
5563    /* Do an initial pass to scan the .debug_types section, if any, and
5564       fill in the signatured types hash table.  This lets us handle
5565       mapping from a type signature to a (cooked) DIE offset directly
5566       in get_Form_contents.  */
5567    if (VG_(clo_read_var_info) && ML_(sli_is_valid)(escn_debug_types)) {
5568       init_Cursor( &info, escn_debug_types, 0, barf,
5569                    "Overrun whilst reading .debug_types section" );
5570       TRACE_D3("\n------ Collecting signatures from "
5571                ".debug_types section ------\n");
5572
5573       abbv_state last_abbv;
5574       last_abbv.debug_abbrev_offset = (ULong) -1;
5575       last_abbv.ht_abbvs = NULL;
5576       while (True) {
5577          UWord   cu_start_offset, cu_offset_now;
5578          CUConst cc;
5579
5580          cu_start_offset = get_position_of_Cursor( &info );
5581          TRACE_D3("\n");
5582          TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
5583          /* parse_CU_header initialises the CU's abbv hash table.  */
5584          parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
5585                           last_abbv, True, False );
5586
5587          /* Needed by cook_die.  */
5588          cc.types_cuOff_bias = escn_debug_info.szB;
5589
5590          record_signatured_type( signature_types, cc.type_signature,
5591                                  cook_die( &cc, cc.type_offset ));
5592
5593          /* Until proven otherwise we assume we don't need the icc9
5594             workaround in this case; see the DIE-reading loop below
5595             for details.  */
5596          cu_offset_now = (cu_start_offset + cc.unit_length
5597                           + (cc.is_dw64 ? 12 : 4));
5598
5599          last_abbv = cc.abbv;
5600
5601          if (cu_offset_now >= escn_debug_types.szB) {
5602             break;
5603          }
5604
5605          set_position_of_Cursor ( &info, cu_offset_now );
5606       }
5607       if (last_abbv.ht_abbvs != NULL)
5608          VG_(HT_destruct) (last_abbv.ht_abbvs, ML_(dinfo_free));
5609    }
5610
5611    /* Perform three DIE-reading passes.  The first pass reads DIEs from
5612       alternate .debug_info (if any), the second pass reads DIEs from
5613       .debug_info, and the third pass reads DIEs from .debug_types.
5614       Moving the body of this loop into a separate function would
5615       require a large number of arguments to be passed in, so it is
5616       kept inline instead.  */
5617    for (pass = 0; pass < 3; ++pass) {
5618       ULong section_size;
5619
5620       if (pass == 0) {
5621          if (!ML_(sli_is_valid)(escn_debug_info_alt))
5622             continue;
5623          /* Now loop over the Compilation Units listed in the alternate
5624             .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
5625             Each compilation unit contains a Compilation Unit Header
5626             followed by precisely one DW_TAG_compile_unit or
5627             DW_TAG_partial_unit DIE. */
5628          init_Cursor( &info, escn_debug_info_alt, 0, barf,
5629                       "Overrun whilst reading alternate .debug_info section" );
5630          section_size = escn_debug_info_alt.szB;
5631
5632          /* Keep track of the last line table we have seen,
5633             it might turn up again.  */
5634          reset_fndn_ix_table(&fndn_ix_Table, &debug_line_offset, (ULong) -1);
5635
5636          TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
5637       } else if (pass == 1) {
5638          /* Now loop over the Compilation Units listed in the .debug_info
5639             section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
5640             unit contains a Compilation Unit Header followed by precisely
5641             one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
5642          init_Cursor( &info, escn_debug_info, 0, barf,
5643                       "Overrun whilst reading .debug_info section" );
5644          section_size = escn_debug_info.szB;
5645
5646          /* Keep track of the last line table we have seen,
5647             it might turn up again.  */
5648          reset_fndn_ix_table(&fndn_ix_Table, &debug_line_offset, (ULong) -1);
5649
5650          TRACE_D3("\n------ Parsing .debug_info section ------\n");
5651       } else {
5652          if (!ML_(sli_is_valid)(escn_debug_types))
5653             continue;
5654          if (!VG_(clo_read_var_info))
5655             continue; // Types not needed when only reading inline info.
5656          init_Cursor( &info, escn_debug_types, 0, barf,
5657                       "Overrun whilst reading .debug_types section" );
5658          section_size = escn_debug_types.szB;
5659
5660          /* Keep track of the last line table we have seen,
5661             it might turn up again.  */
5662          reset_fndn_ix_table(&fndn_ix_Table, &debug_line_offset, (ULong) -1);
5663
5664          TRACE_D3("\n------ Parsing .debug_types section ------\n");
5665       }
5666
5667       abbv_state last_abbv;
5668       last_abbv.debug_abbrev_offset = (ULong) -1;
5669       last_abbv.ht_abbvs = NULL;
5670       while (True) {
5671          ULong   cu_start_offset, cu_offset_now;
5672          CUConst cc;
5673          /* It may be that the stated size of this CU is larger than the
5674             amount of stuff actually in it.  icc9 seems to generate CUs
5675             thusly.  We use these variables to figure out if this is
5676             indeed the case, and if so how many bytes we need to skip to
5677             get to the start of the next CU.  Not skipping those bytes
5678             causes us to misidentify the start of the next CU, and it all
5679             goes badly wrong after that (not surprisingly). */
5680          UWord cu_size_including_IniLen, cu_amount_used;
5681
5682          /* It seems icc9 finishes the DIE info before debug_info_sz
5683             bytes have been used up.  So be flexible, and declare the
5684             sequence complete if there is not enough remaining bytes to
5685             hold even the smallest conceivable CU header.  (11 bytes I
5686             reckon). */
5687          /* JRS 23Jan09: I suspect this is no longer necessary now that
5688             the code below contains a 'while (cu_amount_used <
5689             cu_size_including_IniLen ...'  style loop, which skips over
5690             any leftover bytes at the end of a CU in the case where the
5691             CU's stated size is larger than its actual size (as
5692             determined by reading all its DIEs).  However, for prudence,
5693             I'll leave the following test in place.  I can't see that a
5694             CU header can be smaller than 11 bytes, so I don't think
5695             there's any harm possible through the test -- it just adds
5696             robustness. */
5697          Word avail = get_remaining_length_Cursor( &info );
5698          if (avail < 11) {
5699             if (avail > 0)
5700                TRACE_D3("new_dwarf3_reader_wrk: warning: "
5701                         "%ld unused bytes after end of DIEs\n", avail);
5702             break;
5703          }
5704
5705          if (VG_(clo_read_var_info)) {
5706             /* Check the varparser's stack is in a sane state. */
5707             vg_assert(varparser.sp == -1);
5708             /* Check the typarser's stack is in a sane state. */
5709             vg_assert(typarser.sp == -1);
5710          }
5711
5712          cu_start_offset = get_position_of_Cursor( &info );
5713          TRACE_D3("\n");
5714          TRACE_D3("  Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
5715          /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
5716          if (pass == 0) {
5717             parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
5718                              last_abbv, False, True );
5719          } else {
5720             parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
5721                              last_abbv, pass == 2, False );
5722          }
5723          cc.escn_debug_str      = pass == 0 ? escn_debug_str_alt
5724                                             : escn_debug_str;
5725          cc.escn_debug_ranges   = escn_debug_ranges;
5726          cc.escn_debug_rnglists = escn_debug_rnglists;
5727          cc.escn_debug_loclists = escn_debug_loclists;
5728          cc.escn_debug_loc      = escn_debug_loc;
5729          cc.escn_debug_line     = pass == 0 ? escn_debug_line_alt
5730                                             : escn_debug_line;
5731          cc.escn_debug_info     = pass == 0 ? escn_debug_info_alt
5732                                             : escn_debug_info;
5733          cc.escn_debug_types    = escn_debug_types;
5734          cc.escn_debug_info_alt = escn_debug_info_alt;
5735          cc.escn_debug_str_alt  = escn_debug_str_alt;
5736          cc.escn_debug_line_str = escn_debug_line_str;
5737          cc.escn_debug_addr     = escn_debug_addr;
5738          cc.escn_debug_str_offsets = escn_debug_str_offsets;
5739          cc.types_cuOff_bias    = escn_debug_info.szB;
5740          cc.alt_cuOff_bias      = escn_debug_info.szB + escn_debug_types.szB;
5741          cc.cu_start_offset     = cu_start_offset;
5742          cc.cu_addr_base        = 0;
5743          cc.cu_has_addr_base    = False;
5744          cc.cu_str_offsets_base = 0;
5745          cc.cu_has_str_offsets_base = False;
5746          cc.cu_rnglists_base = 0;
5747          cc.cu_has_rnglists_base = False;
5748          cc.cu_loclists_base = 0;
5749          cc.cu_has_loclists_base = False;
5750          cc.di = di;
5751          /* The CU's svma can be deduced by looking at the AT_low_pc
5752             value in the top level TAG_compile_unit, which is the topmost
5753             DIE.  We'll leave it for the 'varparser' to acquire that info
5754             and fill it in -- since it is the only party to want to know
5755             it. */
5756          cc.cu_svma_known = False;
5757          cc.cu_svma       = 0;
5758
5759          if (VG_(clo_read_var_info)) {
5760             cc.signature_types = signature_types;
5761
5762             /* Create a fake outermost-level range covering the entire
5763                address range.  So we always have *something* to catch all
5764                variable declarations. */
5765             varstack_push( &cc, &varparser, td3,
5766                            unitary_range_list(0UL, ~0UL),
5767                            -1, False/*isFunc*/, NULL/*fbGX*/ );
5768
5769          }
5770
5771          /* Now read the one-and-only top-level DIE for this CU. */
5772          vg_assert(!VG_(clo_read_var_info) || varparser.sp == 0);
5773          read_DIE( rangestree,
5774                    tyents, tempvars, gexprs,
5775                    &typarser, &varparser, &inlparser,
5776                    &fndn_ix_Table, &debug_line_offset,
5777                    &info, td3, &cc, 0 );
5778
5779          cu_offset_now = get_position_of_Cursor( &info );
5780
5781          if (0) VG_(printf)("Travelled: %llu  size %llu\n",
5782                             cu_offset_now - cc.cu_start_offset,
5783                             cc.unit_length + (cc.is_dw64 ? 12 : 4));
5784
5785          /* How big the CU claims it is .. */
5786          cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
5787          /* .. vs how big we have found it to be */
5788          cu_amount_used = cu_offset_now - cc.cu_start_offset;
5789
5790          if (1) TRACE_D3("offset now %llu, d-i-size %llu\n",
5791                          cu_offset_now, section_size);
5792          if (cu_offset_now > section_size)
5793             barf("toplevel DIEs beyond end of CU");
5794
5795          /* If the CU is bigger than it claims to be, we've got a serious
5796             problem. */
5797          if (cu_amount_used > cu_size_including_IniLen)
5798             barf("CU's actual size appears to be larger than it claims it is");
5799
5800          /* If the CU is smaller than it claims to be, we need to skip some
5801             bytes.  Loop updates cu_offset_new and cu_amount_used. */
5802          while (cu_amount_used < cu_size_including_IniLen
5803                 && get_remaining_length_Cursor( &info ) > 0) {
5804             if (0) VG_(printf)("SKIP\n");
5805             (void)get_UChar( &info );
5806             cu_offset_now = get_position_of_Cursor( &info );
5807             cu_amount_used = cu_offset_now - cc.cu_start_offset;
5808          }
5809
5810          if (VG_(clo_read_var_info)) {
5811             /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
5812                anywhere else at all.  Our fake the-entire-address-space
5813                range is at level -1, so preening to -2 should completely
5814                empty the stack out. */
5815             TRACE_D3("\n");
5816             varstack_preen( &varparser, td3, -2 );
5817             /* Similarly, empty the type stack out. */
5818             typestack_preen( &typarser, td3, -2 );
5819          }
5820
5821          last_abbv = cc.abbv;
5822
5823          if (cu_offset_now == section_size)
5824             break;
5825          /* else keep going */
5826       }
5827       if (last_abbv.ht_abbvs != NULL)
5828          VG_(HT_destruct) (last_abbv.ht_abbvs, ML_(dinfo_free));
5829    }
5830
5831    if (fndn_ix_Table != NULL)
5832       VG_(deleteXA)(fndn_ix_Table);
5833
5834    if (VG_(clo_read_var_info)) {
5835       /* From here on we're post-processing the stuff we got
5836          out of the .debug_info section. */
5837       if (TD3) {
5838          TRACE_D3("\n");
5839          ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
5840          TRACE_D3("\n");
5841          TRACE_D3("------ Compressing type entries ------\n");
5842       }
5843
5844       tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
5845                                         sizeof(TyEntIndexCache) );
5846       ML_(TyEntIndexCache__invalidate)( tyents_cache );
5847       dedup_types( td3, tyents, tyents_cache );
5848       if (TD3) {
5849          TRACE_D3("\n");
5850          ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
5851       }
5852
5853       TRACE_D3("\n");
5854       TRACE_D3("------ Resolving the types of variables ------\n" );
5855       resolve_variable_types( barf, tyents, tyents_cache, tempvars );
5856
5857       /* Copy all the non-INDIR tyents into a new table.  For large
5858          .so's, about 90% of the tyents will by now have been resolved to
5859          INDIRs, and we no longer need them, and so don't need to store
5860          them. */
5861       tyents_to_keep
5862          = VG_(newXA)( ML_(dinfo_zalloc),
5863                        "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
5864                        ML_(dinfo_free), sizeof(TyEnt) );
5865       n = VG_(sizeXA)( tyents );
5866       for (i = 0; i < n; i++) {
5867          TyEnt* ent = VG_(indexXA)( tyents, i );
5868          if (ent->tag != Te_INDIR)
5869             VG_(addToXA)( tyents_to_keep, ent );
5870       }
5871
5872       VG_(deleteXA)( tyents );
5873       tyents = NULL;
5874       ML_(dinfo_free)( tyents_cache );
5875       tyents_cache = NULL;
5876
5877       /* Sort tyents_to_keep so we can lookup in it.  A complete (if
5878          minor) waste of time, since tyents itself is sorted, but
5879          necessary since VG_(lookupXA) refuses to cooperate if we
5880          don't. */
5881       VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
5882       VG_(sortXA)( tyents_to_keep );
5883
5884       /* Enable cacheing on tyents_to_keep */
5885       tyents_to_keep_cache
5886          = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
5887                               sizeof(TyEntIndexCache) );
5888       ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
5889
5890       /* And record the tyents in the DebugInfo.  We do this before
5891          starting to hand variables to ML_(addVar), since if ML_(addVar)
5892          wants to do debug printing (of the types of said vars) then it
5893          will need the tyents.*/
5894       vg_assert(!di->admin_tyents);
5895       di->admin_tyents = tyents_to_keep;
5896
5897       /* Bias all the location expressions. */
5898       TRACE_D3("\n");
5899       TRACE_D3("------ Biasing the location expressions ------\n" );
5900
5901       n = VG_(sizeXA)( gexprs );
5902       for (i = 0; i < n; i++) {
5903          gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
5904          bias_GX( gexpr, di );
5905       }
5906
5907       TRACE_D3("\n");
5908       TRACE_D3("------ Acquired the following variables: ------\n\n");
5909
5910       /* Park (pointers to) all the vars in an XArray, so we can look up
5911          abstract origins quickly.  The array is sorted (hence, looked-up
5912          by) the .dioff fields.  Since the .dioffs should be in strictly
5913          ascending order, there is no need to sort the array after
5914          construction.  The ascendingness is however asserted for. */
5915       dioff_lookup_tab
5916          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
5917                        ML_(dinfo_free),
5918                        sizeof(TempVar*) );
5919
5920       n = VG_(sizeXA)( tempvars );
5921       Word first_primary_var = 0;
5922       for (first_primary_var = 0;
5923            escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
5924            first_primary_var++) {
5925          varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
5926          if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
5927             break;
5928       }
5929       for (i = 0; i < n; i++) {
5930          varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
5931          if (i > first_primary_var) {
5932             varp2 = *(TempVar**)VG_(indexXA)( tempvars,
5933                                               (i + first_primary_var - 1) % n );
5934             /* why should this hold?  Only, I think, because we've
5935                constructed the array by reading .debug_info sequentially,
5936                and so the array .dioff fields should reflect that, and be
5937                strictly ascending. */
5938             vg_assert(varp2->dioff < varp->dioff);
5939          }
5940          VG_(addToXA)( dioff_lookup_tab, &varp );
5941       }
5942       VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
5943       VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
5944
5945       /* Now visit each var.  Collect up as much info as possible for
5946          each var and hand it to ML_(addVar). */
5947       n = VG_(sizeXA)( tempvars );
5948       for (j = 0; j < n; j++) {
5949          TyEnt* ent;
5950          varp = *(TempVar**)VG_(indexXA)( tempvars, j );
5951
5952          /* Possibly show .. */
5953          if (TD3) {
5954             VG_(printf)("<%lx> addVar: level %d: %s :: ",
5955                         varp->dioff,
5956                         varp->level,
5957                         varp->name ? varp->name : "<anon_var>" );
5958             if (varp->typeR) {
5959                ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
5960             } else {
5961                VG_(printf)("NULL");
5962             }
5963             VG_(printf)("\n  Loc=");
5964             if (varp->gexpr) {
5965                ML_(pp_GX)(varp->gexpr);
5966             } else {
5967                VG_(printf)("NULL");
5968             }
5969             VG_(printf)("\n");
5970             if (varp->fbGX) {
5971                VG_(printf)("  FrB=");
5972                ML_(pp_GX)( varp->fbGX );
5973                VG_(printf)("\n");
5974             } else {
5975                VG_(printf)("  FrB=none\n");
5976             }
5977             VG_(printf)("  declared at: %u %s:%d\n",
5978                         varp->fndn_ix,
5979                         ML_(fndn_ix2filename) (di, varp->fndn_ix),
5980                         varp->fLine );
5981             if (varp->absOri != (UWord)D3_INVALID_CUOFF)
5982                VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
5983          }
5984
5985          /* Skip variables which have no location.  These must be
5986             abstract instances; they are useless as-is since with no
5987             location they have no specified memory location.  They will
5988             presumably be referred to via the absOri fields of other
5989             variables. */
5990          if (!varp->gexpr) {
5991             TRACE_D3("  SKIP (no location)\n\n");
5992             continue;
5993          }
5994
5995          /* So it has a location, at least.  If it refers to some other
5996             entry through its absOri field, pull in further info through
5997             that. */
5998          if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
5999             Bool found;
6000             Word ixFirst, ixLast;
6001             TempVar key;
6002             TempVar* keyp = &key;
6003             TempVar *varAI;
6004             VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
6005             key.dioff = varp->absOri; /* this is what we want to find */
6006             found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
6007                                    &ixFirst, &ixLast );
6008             if (!found) {
6009                /* barf("DW_AT_abstract_origin can't be resolved"); */
6010                TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
6011                continue;
6012             }
6013             /* If the following fails, there is more than one entry with
6014                the same dioff.  Which can't happen. */
6015             vg_assert(ixFirst == ixLast);
6016             varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
6017             /* stay sane */
6018             vg_assert(varAI);
6019             vg_assert(varAI->dioff == varp->absOri);
6020
6021             /* Copy what useful info we can. */
6022             if (varAI->typeR && !varp->typeR)
6023                varp->typeR = varAI->typeR;
6024             if (varAI->name && !varp->name)
6025                varp->name = varAI->name;
6026             if (varAI->fndn_ix && !varp->fndn_ix)
6027                varp->fndn_ix = varAI->fndn_ix;
6028             if (varAI->fLine > 0 && varp->fLine == 0)
6029                varp->fLine = varAI->fLine;
6030          }
6031
6032          /* Give it a name if it doesn't have one. */
6033          if (!varp->name)
6034             varp->name = ML_(addStr)( di, "<anon_var>", -1 );
6035
6036          /* So now does it have enough info to be useful? */
6037          /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
6038             the type didn't get resolved.  Really, in that case
6039             something's broken earlier on, and should be fixed, rather
6040             than just skipping the variable. */
6041          ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
6042                                             tyents_to_keep_cache,
6043                                             varp->typeR );
6044          /* The next two assertions should be guaranteed by
6045             our previous call to resolve_variable_types. */
6046          vg_assert(ent);
6047          vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
6048
6049          if (ent->tag == Te_UNKNOWN) continue;
6050
6051          vg_assert(varp->gexpr);
6052          vg_assert(varp->name);
6053          vg_assert(varp->typeR);
6054          vg_assert(varp->level >= 0);
6055
6056          /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
6057             each address range in which the variable exists. */
6058          TRACE_D3("  ACQUIRE for range(s) ");
6059          { AddrRange  oneRange;
6060            AddrRange* varPcRanges;
6061            Word       nVarPcRanges;
6062            /* Set up to iterate over address ranges, however
6063               represented. */
6064            if (varp->nRanges == 0 || varp->nRanges == 1) {
6065               vg_assert(!varp->rngMany);
6066               if (varp->nRanges == 0) {
6067                  vg_assert(varp->rngOneMin == 0);
6068                  vg_assert(varp->rngOneMax == 0);
6069               }
6070               nVarPcRanges = varp->nRanges;
6071               oneRange.aMin = varp->rngOneMin;
6072               oneRange.aMax = varp->rngOneMax;
6073               varPcRanges = &oneRange;
6074            } else {
6075               vg_assert(varp->rngMany);
6076               vg_assert(varp->rngOneMin == 0);
6077               vg_assert(varp->rngOneMax == 0);
6078               nVarPcRanges = VG_(sizeXA)(varp->rngMany);
6079               vg_assert(nVarPcRanges >= 2);
6080               vg_assert(nVarPcRanges == (Word)varp->nRanges);
6081               varPcRanges = VG_(indexXA)(varp->rngMany, 0);
6082            }
6083            if (varp->level == 0)
6084               vg_assert( nVarPcRanges == 1 );
6085            /* and iterate */
6086            for (i = 0; i < nVarPcRanges; i++) {
6087               Addr pcMin = varPcRanges[i].aMin;
6088               Addr pcMax = varPcRanges[i].aMax;
6089               vg_assert(pcMin <= pcMax);
6090               /* Level 0 is the global address range.  So at level 0 we
6091                  don't want to bias pcMin/pcMax; but at all other levels
6092                  we do since those are derived from svmas in the Dwarf
6093                  we're reading.  Be paranoid ... */
6094               if (varp->level == 0) {
6095                  vg_assert(pcMin == (Addr)0);
6096                  vg_assert(pcMax == ~(Addr)0);
6097               } else {
6098                  /* vg_assert(pcMin > (Addr)0);
6099                     No .. we can legitimately expect to see ranges like
6100                     0x0-0x11D (pre-biasing, of course). */
6101                  vg_assert(pcMax < ~(Addr)0);
6102               }
6103
6104               /* Apply text biasing, for non-global variables. */
6105               if (varp->level > 0) {
6106                  pcMin += di->text_debug_bias;
6107                  pcMax += di->text_debug_bias;
6108               }
6109
6110               if (i > 0 && (i%2) == 0)
6111                  TRACE_D3("\n                       ");
6112               TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
6113
6114               ML_(addVar)(
6115                  di, varp->level,
6116                      pcMin, pcMax,
6117                      varp->name,  varp->typeR,
6118                      varp->gexpr, varp->fbGX,
6119                      varp->fndn_ix, varp->fLine, td3
6120               );
6121            }
6122          }
6123
6124          TRACE_D3("\n\n");
6125          /* and move on to the next var */
6126       }
6127
6128       /* Now free all the TempVars */
6129       n = VG_(sizeXA)( tempvars );
6130       for (i = 0; i < n; i++) {
6131          varp = *(TempVar**)VG_(indexXA)( tempvars, i );
6132          ML_(dinfo_free)(varp);
6133       }
6134       VG_(deleteXA)( tempvars );
6135       tempvars = NULL;
6136
6137       /* and the temp lookup table */
6138       VG_(deleteXA)( dioff_lookup_tab );
6139
6140       /* and the ranges tree.  Note that we need to also free the XArrays
6141          which constitute the keys, hence pass VG_(deleteXA) as a
6142          key-finalizer. */
6143       VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
6144
6145       /* and the tyents_to_keep cache */
6146       ML_(dinfo_free)( tyents_to_keep_cache );
6147       tyents_to_keep_cache = NULL;
6148
6149       /* And the signatured type hash.  */
6150       VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
6151
6152       /* record the GExprs in di so they can be freed later */
6153       vg_assert(!di->admin_gexprs);
6154       di->admin_gexprs = gexprs;
6155    }
6156
6157    // Free up dynamically allocated memory
6158    if (VG_(clo_read_var_info)) {
6159       type_parser_release(&typarser);
6160       var_parser_release(&varparser);
6161    }
6162 }
6163
6164
6165 /*------------------------------------------------------------*/
6166 /*---                                                      ---*/
6167 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
6168 /*---                                                      ---*/
6169 /*------------------------------------------------------------*/
6170
6171 static Bool               d3rd_jmpbuf_valid  = False;
6172 static const HChar*       d3rd_jmpbuf_reason = NULL;
6173 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
6174
6175 static __attribute__((noreturn)) void barf ( const HChar* reason ) {
6176    vg_assert(d3rd_jmpbuf_valid);
6177    d3rd_jmpbuf_reason = reason;
6178    VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
6179    /*NOTREACHED*/
6180    vg_assert(0);
6181 }
6182
6183
6184 void
6185 ML_(new_dwarf3_reader) (
6186    DebugInfo* di,
6187    DiSlice escn_debug_info,      DiSlice escn_debug_types,
6188    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
6189    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
6190    DiSlice escn_debug_rnglists,  DiSlice escn_debug_loclists,
6191    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
6192    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
6193    DiSlice escn_debug_str_alt,   DiSlice escn_debug_line_str,
6194    DiSlice escn_debug_addr,      DiSlice escn_debug_str_offsets
6195 )
6196 {
6197    volatile Int  jumped;
6198    volatile Bool td3 = di->trace_symtab;
6199
6200    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
6201       just returns normally.  If there is any failure, it longjmp's
6202       back here, having first set d3rd_jmpbuf_reason to something
6203       useful. */
6204    vg_assert(d3rd_jmpbuf_valid  == False);
6205    vg_assert(d3rd_jmpbuf_reason == NULL);
6206
6207    d3rd_jmpbuf_valid = True;
6208    jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
6209    if (jumped == 0) {
6210       /* try this ... */
6211       new_dwarf3_reader_wrk( di, barf,
6212                              escn_debug_info,     escn_debug_types,
6213                              escn_debug_abbv,     escn_debug_line,
6214                              escn_debug_str,      escn_debug_ranges,
6215                              escn_debug_rnglists, escn_debug_loclists,
6216                              escn_debug_loc,      escn_debug_info_alt,
6217                              escn_debug_abbv_alt, escn_debug_line_alt,
6218                              escn_debug_str_alt,  escn_debug_line_str,
6219                              escn_debug_addr,     escn_debug_str_offsets );
6220       d3rd_jmpbuf_valid = False;
6221       TRACE_D3("\n------ .debug_info reading was successful ------\n");
6222    } else {
6223       /* It longjmp'd. */
6224       d3rd_jmpbuf_valid = False;
6225       /* Can't longjump without giving some sort of reason. */
6226       vg_assert(d3rd_jmpbuf_reason != NULL);
6227
6228       TRACE_D3("\n------ .debug_info reading failed ------\n");
6229
6230       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
6231    }
6232
6233    d3rd_jmpbuf_valid  = False;
6234    d3rd_jmpbuf_reason = NULL;
6235 }
6236
6237
6238
6239 /* --- Unused code fragments which might be useful one day. --- */
6240
6241 #if 0
6242    /* Read the arange tables */
6243    TRACE_SYMTAB("\n");
6244    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
6245    init_Cursor( &aranges, debug_aranges_img,
6246                 debug_aranges_sz, 0, barf,
6247                 "Overrun whilst reading .debug_aranges section" );
6248    while (True) {
6249       ULong  len, d_i_offset;
6250       Bool   is64;
6251       UShort version;
6252       UChar  asize, segsize;
6253
6254       if (is_at_end_Cursor( &aranges ))
6255          break;
6256       /* Read one arange thingy */
6257       /* initial_length field */
6258       len = get_Initial_Length( &is64, &aranges,
6259                "in .debug_aranges: invalid initial-length field" );
6260       version    = get_UShort( &aranges );
6261       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
6262       asize      = get_UChar( &aranges );
6263       segsize    = get_UChar( &aranges );
6264       TRACE_D3("  Length:                   %llu\n", len);
6265       TRACE_D3("  Version:                  %d\n", (Int)version);
6266       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
6267       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
6268       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
6269       TRACE_D3("\n");
6270       TRACE_D3("    Address            Length\n");
6271
6272       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
6273          (void)get_UChar( & aranges );
6274       }
6275       while (True) {
6276          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
6277          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
6278          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
6279          if (address == 0 && length == 0) break;
6280       }
6281    }
6282    TRACE_SYMTAB("\n");
6283 #endif
6284
6285 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_freebsd)
6286
6287 /*--------------------------------------------------------------------*/
6288 /*--- end                                                          ---*/
6289 /*--------------------------------------------------------------------*/