gdb/dwarf-index-write.c

   1 /* DWARF index writing support for GDB.
   2
   3    Copyright (C) 1994-2020 Free Software Foundation, Inc.
   4
   5    This file is part of GDB.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #include "defs.h"
  21
  22 #include "dwarf-index-write.h"
  23
  24 #include "addrmap.h"
  25 #include "cli/cli-decode.h"
  26 #include "gdbsupport/byte-vector.h"
  27 #include "gdbsupport/filestuff.h"
  28 #include "gdbsupport/gdb_unlinker.h"
  29 #include "gdbsupport/pathstuff.h"
  30 #include "gdbsupport/scoped_fd.h"
  31 #include "complaints.h"
  32 #include "dwarf-index-common.h"
  33 #include "dwarf2.h"
  34 #include "dwarf2read.h"
  35 #include "gdb/gdb-index.h"
  36 #include "gdbcmd.h"
  37 #include "objfiles.h"
  38 #include "psympriv.h"
  39 #include "ada-lang.h"
  40
  41 #include <algorithm>
  42 #include <cmath>
  43 #include <set>
  44 #include <unordered_map>
  45 #include <unordered_set>
  46
  47 /* Ensure only legit values are used.  */
  48 #define DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE(cu_index, value) \
  49   do { \
  50     gdb_assert ((unsigned int) (value) <= 1); \
  51     GDB_INDEX_SYMBOL_STATIC_SET_VALUE((cu_index), (value)); \
  52   } while (0)
  53
  54 /* Ensure only legit values are used.  */
  55 #define DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE(cu_index, value) \
  56   do { \
  57     gdb_assert ((value) >= GDB_INDEX_SYMBOL_KIND_TYPE \
  58                 && (value) <= GDB_INDEX_SYMBOL_KIND_OTHER); \
  59     GDB_INDEX_SYMBOL_KIND_SET_VALUE((cu_index), (value)); \
  60   } while (0)
  61
  62 /* Ensure we don't use more than the allotted number of bits for the CU.  */
  63 #define DW2_GDB_INDEX_CU_SET_VALUE(cu_index, value) \
  64   do { \
  65     gdb_assert (((value) & ~GDB_INDEX_CU_MASK) == 0); \
  66     GDB_INDEX_CU_SET_VALUE((cu_index), (value)); \
  67   } while (0)
  68
  69 /* The "save gdb-index" command.  */
  70
  71 /* Write SIZE bytes from the buffer pointed to by DATA to FILE, with
  72    error checking.  */
  73
  74 static void
  75 file_write (FILE *file, const void *data, size_t size)
  76 {
  77   if (fwrite (data, 1, size, file) != size)
  78     error (_("couldn't data write to file"));
  79 }
  80
  81 /* Write the contents of VEC to FILE, with error checking.  */
  82
  83 template<typename Elem, typename Alloc>
  84 static void
  85 file_write (FILE *file, const std::vector<Elem, Alloc> &vec)
  86 {
  87   if (!vec.empty ())
  88     file_write (file, vec.data (), vec.size () * sizeof (vec[0]));
  89 }
  90
  91 /* In-memory buffer to prepare data to be written later to a file.  */
  92 class data_buf
  93 {
  94 public:
  95   /* Copy DATA to the end of the buffer.  */
  96   template<typename T>
  97   void append_data (const T &data)
  98   {
  99     std::copy (reinterpret_cast<const gdb_byte *> (&data),
 100                reinterpret_cast<const gdb_byte *> (&data + 1),
 101                grow (sizeof (data)));
 102   }
 103
 104   /* Copy CSTR (a zero-terminated string) to the end of buffer.  The
 105      terminating zero is appended too.  */
 106   void append_cstr0 (const char *cstr)
 107   {
 108     const size_t size = strlen (cstr) + 1;
 109     std::copy (cstr, cstr + size, grow (size));
 110   }
 111
 112   /* Store INPUT as ULEB128 to the end of buffer.  */
 113   void append_unsigned_leb128 (ULONGEST input)
 114   {
 115     for (;;)
 116       {
 117         gdb_byte output = input & 0x7f;
 118         input >>= 7;
 119         if (input)
 120           output |= 0x80;
 121         append_data (output);
 122         if (input == 0)
 123           break;
 124       }
 125   }
 126
 127   /* Accept a host-format integer in VAL and append it to the buffer
 128      as a target-format integer which is LEN bytes long.  */
 129   void append_uint (size_t len, bfd_endian byte_order, ULONGEST val)
 130   {
 131     ::store_unsigned_integer (grow (len), len, byte_order, val);
 132   }
 133
 134   /* Return the size of the buffer.  */
 135   size_t size () const
 136   {
 137     return m_vec.size ();
 138   }
 139
 140   /* Return true iff the buffer is empty.  */
 141   bool empty () const
 142   {
 143     return m_vec.empty ();
 144   }
 145
 146   /* Write the buffer to FILE.  */
 147   void file_write (FILE *file) const
 148   {
 149     ::file_write (file, m_vec);
 150   }
 151
 152 private:
 153   /* Grow SIZE bytes at the end of the buffer.  Returns a pointer to
 154      the start of the new block.  */
 155   gdb_byte *grow (size_t size)
 156   {
 157     m_vec.resize (m_vec.size () + size);
 158     return &*(m_vec.end () - size);
 159   }
 160
 161   gdb::byte_vector m_vec;
 162 };
 163
 164 /* An entry in the symbol table.  */
 165 struct symtab_index_entry
 166 {
 167   /* The name of the symbol.  */
 168   const char *name;
 169   /* The offset of the name in the constant pool.  */
 170   offset_type index_offset;
 171   /* A sorted vector of the indices of all the CUs that hold an object
 172      of this name.  */
 173   std::vector<offset_type> cu_indices;
 174 };
 175
 176 /* The symbol table.  This is a power-of-2-sized hash table.  */
 177 struct mapped_symtab
 178 {
 179   mapped_symtab ()
 180   {
 181     data.resize (1024);
 182   }
 183
 184   offset_type n_elements = 0;
 185   std::vector<symtab_index_entry> data;
 186 };
 187
 188 /* Find a slot in SYMTAB for the symbol NAME.  Returns a reference to
 189    the slot.
 190
 191    Function is used only during write_hash_table so no index format backward
 192    compatibility is needed.  */
 193
 194 static symtab_index_entry &
 195 find_slot (struct mapped_symtab *symtab, const char *name)
 196 {
 197   offset_type index, step, hash = mapped_index_string_hash (INT_MAX, name);
 198
 199   index = hash & (symtab->data.size () - 1);
 200   step = ((hash * 17) & (symtab->data.size () - 1)) | 1;
 201
 202   for (;;)
 203     {
 204       if (symtab->data[index].name == NULL
 205           || strcmp (name, symtab->data[index].name) == 0)
 206         return symtab->data[index];
 207       index = (index + step) & (symtab->data.size () - 1);
 208     }
 209 }
 210
 211 /* Expand SYMTAB's hash table.  */
 212
 213 static void
 214 hash_expand (struct mapped_symtab *symtab)
 215 {
 216   auto old_entries = std::move (symtab->data);
 217
 218   symtab->data.clear ();
 219   symtab->data.resize (old_entries.size () * 2);
 220
 221   for (auto &it : old_entries)
 222     if (it.name != NULL)
 223       {
 224         auto &ref = find_slot (symtab, it.name);
 225         ref = std::move (it);
 226       }
 227 }
 228
 229 /* Add an entry to SYMTAB.  NAME is the name of the symbol.
 230    CU_INDEX is the index of the CU in which the symbol appears.
 231    IS_STATIC is one if the symbol is static, otherwise zero (global).  */
 232
 233 static void
 234 add_index_entry (struct mapped_symtab *symtab, const char *name,
 235                  int is_static, gdb_index_symbol_kind kind,
 236                  offset_type cu_index)
 237 {
 238   offset_type cu_index_and_attrs;
 239
 240   ++symtab->n_elements;
 241   if (4 * symtab->n_elements / 3 >= symtab->data.size ())
 242     hash_expand (symtab);
 243
 244   symtab_index_entry &slot = find_slot (symtab, name);
 245   if (slot.name == NULL)
 246     {
 247       slot.name = name;
 248       /* index_offset is set later.  */
 249     }
 250
 251   cu_index_and_attrs = 0;
 252   DW2_GDB_INDEX_CU_SET_VALUE (cu_index_and_attrs, cu_index);
 253   DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE (cu_index_and_attrs, is_static);
 254   DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE (cu_index_and_attrs, kind);
 255
 256   /* We don't want to record an index value twice as we want to avoid the
 257      duplication.
 258      We process all global symbols and then all static symbols
 259      (which would allow us to avoid the duplication by only having to check
 260      the last entry pushed), but a symbol could have multiple kinds in one CU.
 261      To keep things simple we don't worry about the duplication here and
 262      sort and uniquify the list after we've processed all symbols.  */
 263   slot.cu_indices.push_back (cu_index_and_attrs);
 264 }
 265
 266 /* Sort and remove duplicates of all symbols' cu_indices lists.  */
 267
 268 static void
 269 uniquify_cu_indices (struct mapped_symtab *symtab)
 270 {
 271   for (auto &entry : symtab->data)
 272     {
 273       if (entry.name != NULL && !entry.cu_indices.empty ())
 274         {
 275           auto &cu_indices = entry.cu_indices;
 276           std::sort (cu_indices.begin (), cu_indices.end ());
 277           auto from = std::unique (cu_indices.begin (), cu_indices.end ());
 278           cu_indices.erase (from, cu_indices.end ());
 279         }
 280     }
 281 }
 282
 283 /* A form of 'const char *' suitable for container keys.  Only the
 284    pointer is stored.  The strings themselves are compared, not the
 285    pointers.  */
 286 class c_str_view
 287 {
 288 public:
 289   c_str_view (const char *cstr)
 290     : m_cstr (cstr)
 291   {}
 292
 293   bool operator== (const c_str_view &other) const
 294   {
 295     return strcmp (m_cstr, other.m_cstr) == 0;
 296   }
 297
 298   /* Return the underlying C string.  Note, the returned string is
 299      only a reference with lifetime of this object.  */
 300   const char *c_str () const
 301   {
 302     return m_cstr;
 303   }
 304
 305 private:
 306   friend class c_str_view_hasher;
 307   const char *const m_cstr;
 308 };
 309
 310 /* A std::unordered_map::hasher for c_str_view that uses the right
 311    hash function for strings in a mapped index.  */
 312 class c_str_view_hasher
 313 {
 314 public:
 315   size_t operator () (const c_str_view &x) const
 316   {
 317     return mapped_index_string_hash (INT_MAX, x.m_cstr);
 318   }
 319 };
 320
 321 /* A std::unordered_map::hasher for std::vector<>.  */
 322 template<typename T>
 323 class vector_hasher
 324 {
 325 public:
 326   size_t operator () (const std::vector<T> &key) const
 327   {
 328     return iterative_hash (key.data (),
 329                            sizeof (key.front ()) * key.size (), 0);
 330   }
 331 };
 332
 333 /* Write the mapped hash table SYMTAB to the data buffer OUTPUT, with
 334    constant pool entries going into the data buffer CPOOL.  */
 335
 336 static void
 337 write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
 338 {
 339   {
 340     /* Elements are sorted vectors of the indices of all the CUs that
 341        hold an object of this name.  */
 342     std::unordered_map<std::vector<offset_type>, offset_type,
 343                        vector_hasher<offset_type>>
 344       symbol_hash_table;
 345
 346     /* We add all the index vectors to the constant pool first, to
 347        ensure alignment is ok.  */
 348     for (symtab_index_entry &entry : symtab->data)
 349       {
 350         if (entry.name == NULL)
 351           continue;
 352         gdb_assert (entry.index_offset == 0);
 353
 354         /* Finding before inserting is faster than always trying to
 355            insert, because inserting always allocates a node, does the
 356            lookup, and then destroys the new node if another node
 357            already had the same key.  C++17 try_emplace will avoid
 358            this.  */
 359         const auto found
 360           = symbol_hash_table.find (entry.cu_indices);
 361         if (found != symbol_hash_table.end ())
 362           {
 363             entry.index_offset = found->second;
 364             continue;
 365           }
 366
 367         symbol_hash_table.emplace (entry.cu_indices, cpool.size ());
 368         entry.index_offset = cpool.size ();
 369         cpool.append_data (MAYBE_SWAP (entry.cu_indices.size ()));
 370         for (const auto index : entry.cu_indices)
 371           cpool.append_data (MAYBE_SWAP (index));
 372       }
 373   }
 374
 375   /* Now write out the hash table.  */
 376   std::unordered_map<c_str_view, offset_type, c_str_view_hasher> str_table;
 377   for (const auto &entry : symtab->data)
 378     {
 379       offset_type str_off, vec_off;
 380
 381       if (entry.name != NULL)
 382         {
 383           const auto insertpair = str_table.emplace (entry.name, cpool.size ());
 384           if (insertpair.second)
 385             cpool.append_cstr0 (entry.name);
 386           str_off = insertpair.first->second;
 387           vec_off = entry.index_offset;
 388         }
 389       else
 390         {
 391           /* While 0 is a valid constant pool index, it is not valid
 392              to have 0 for both offsets.  */
 393           str_off = 0;
 394           vec_off = 0;
 395         }
 396
 397       output.append_data (MAYBE_SWAP (str_off));
 398       output.append_data (MAYBE_SWAP (vec_off));
 399     }
 400 }
 401
 402 typedef std::unordered_map<partial_symtab *, unsigned int> psym_index_map;
 403
 404 /* Helper struct for building the address table.  */
 405 struct addrmap_index_data
 406 {
 407   addrmap_index_data (data_buf &addr_vec_, psym_index_map &cu_index_htab_)
 408     : addr_vec (addr_vec_), cu_index_htab (cu_index_htab_)
 409   {}
 410
 411   struct objfile *objfile;
 412   data_buf &addr_vec;
 413   psym_index_map &cu_index_htab;
 414
 415   /* Non-zero if the previous_* fields are valid.
 416      We can't write an entry until we see the next entry (since it is only then
 417      that we know the end of the entry).  */
 418   int previous_valid;
 419   /* Index of the CU in the table of all CUs in the index file.  */
 420   unsigned int previous_cu_index;
 421   /* Start address of the CU.  */
 422   CORE_ADDR previous_cu_start;
 423 };
 424
 425 /* Write an address entry to ADDR_VEC.  */
 426
 427 static void
 428 add_address_entry (struct objfile *objfile, data_buf &addr_vec,
 429                    CORE_ADDR start, CORE_ADDR end, unsigned int cu_index)
 430 {
 431   addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start);
 432   addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end);
 433   addr_vec.append_data (MAYBE_SWAP (cu_index));
 434 }
 435
 436 /* Worker function for traversing an addrmap to build the address table.  */
 437
 438 static int
 439 add_address_entry_worker (void *datap, CORE_ADDR start_addr, void *obj)
 440 {
 441   struct addrmap_index_data *data = (struct addrmap_index_data *) datap;
 442   struct partial_symtab *pst = (struct partial_symtab *) obj;
 443
 444   if (data->previous_valid)
 445     add_address_entry (data->objfile, data->addr_vec,
 446                        data->previous_cu_start, start_addr,
 447                        data->previous_cu_index);
 448
 449   data->previous_cu_start = start_addr;
 450   if (pst != NULL)
 451     {
 452       const auto it = data->cu_index_htab.find (pst);
 453       gdb_assert (it != data->cu_index_htab.cend ());
 454       data->previous_cu_index = it->second;
 455       data->previous_valid = 1;
 456     }
 457   else
 458     data->previous_valid = 0;
 459
 460   return 0;
 461 }
 462
 463 /* Write OBJFILE's address map to ADDR_VEC.
 464    CU_INDEX_HTAB is used to map addrmap entries to their CU indices
 465    in the index file.  */
 466
 467 static void
 468 write_address_map (struct objfile *objfile, data_buf &addr_vec,
 469                    psym_index_map &cu_index_htab)
 470 {
 471   struct addrmap_index_data addrmap_index_data (addr_vec, cu_index_htab);
 472
 473   /* When writing the address table, we have to cope with the fact that
 474      the addrmap iterator only provides the start of a region; we have to
 475      wait until the next invocation to get the start of the next region.  */
 476
 477   addrmap_index_data.objfile = objfile;
 478   addrmap_index_data.previous_valid = 0;
 479
 480   addrmap_foreach (objfile->partial_symtabs->psymtabs_addrmap,
 481                    add_address_entry_worker, &addrmap_index_data);
 482
 483   /* It's highly unlikely the last entry (end address = 0xff...ff)
 484      is valid, but we should still handle it.
 485      The end address is recorded as the start of the next region, but that
 486      doesn't work here.  To cope we pass 0xff...ff, this is a rare situation
 487      anyway.  */
 488   if (addrmap_index_data.previous_valid)
 489     add_address_entry (objfile, addr_vec,
 490                        addrmap_index_data.previous_cu_start, (CORE_ADDR) -1,
 491                        addrmap_index_data.previous_cu_index);
 492 }
 493
 494 /* Return the symbol kind of PSYM.  */
 495
 496 static gdb_index_symbol_kind
 497 symbol_kind (struct partial_symbol *psym)
 498 {
 499   domain_enum domain = psym->domain;
 500   enum address_class aclass = psym->aclass;
 501
 502   switch (domain)
 503     {
 504     case VAR_DOMAIN:
 505       switch (aclass)
 506         {
 507         case LOC_BLOCK:
 508           return GDB_INDEX_SYMBOL_KIND_FUNCTION;
 509         case LOC_TYPEDEF:
 510           return GDB_INDEX_SYMBOL_KIND_TYPE;
 511         case LOC_COMPUTED:
 512         case LOC_CONST_BYTES:
 513         case LOC_OPTIMIZED_OUT:
 514         case LOC_STATIC:
 515           return GDB_INDEX_SYMBOL_KIND_VARIABLE;
 516         case LOC_CONST:
 517           /* Note: It's currently impossible to recognize psyms as enum values
 518              short of reading the type info.  For now punt.  */
 519           return GDB_INDEX_SYMBOL_KIND_VARIABLE;
 520         default:
 521           /* There are other LOC_FOO values that one might want to classify
 522              as variables, but dwarf2read.c doesn't currently use them.  */
 523           return GDB_INDEX_SYMBOL_KIND_OTHER;
 524         }
 525     case STRUCT_DOMAIN:
 526       return GDB_INDEX_SYMBOL_KIND_TYPE;
 527     default:
 528       return GDB_INDEX_SYMBOL_KIND_OTHER;
 529     }
 530 }
 531
 532 /* Add a list of partial symbols to SYMTAB.  */
 533
 534 static void
 535 write_psymbols (struct mapped_symtab *symtab,
 536                 std::unordered_set<partial_symbol *> &psyms_seen,
 537                 struct partial_symbol **psymp,
 538                 int count,
 539                 offset_type cu_index,
 540                 int is_static)
 541 {
 542   for (; count-- > 0; ++psymp)
 543     {
 544       struct partial_symbol *psym = *psymp;
 545
 546       if (psym->ginfo.language () == language_ada)
 547         error (_("Ada is not currently supported by the index; "
 548                  "use the DWARF 5 index instead"));
 549
 550       /* Only add a given psymbol once.  */
 551       if (psyms_seen.insert (psym).second)
 552         {
 553           gdb_index_symbol_kind kind = symbol_kind (psym);
 554
 555           add_index_entry (symtab, psym->ginfo.search_name (),
 556                            is_static, kind, cu_index);
 557         }
 558     }
 559 }
 560
 561 /* A helper struct used when iterating over debug_types.  */
 562 struct signatured_type_index_data
 563 {
 564   signatured_type_index_data (data_buf &types_list_,
 565                               std::unordered_set<partial_symbol *> &psyms_seen_)
 566     : types_list (types_list_), psyms_seen (psyms_seen_)
 567   {}
 568
 569   struct objfile *objfile;
 570   struct mapped_symtab *symtab;
 571   data_buf &types_list;
 572   std::unordered_set<partial_symbol *> &psyms_seen;
 573   int cu_index;
 574 };
 575
 576 /* A helper function that writes a single signatured_type to an
 577    obstack.  */
 578
 579 static int
 580 write_one_signatured_type (void **slot, void *d)
 581 {
 582   struct signatured_type_index_data *info
 583     = (struct signatured_type_index_data *) d;
 584   struct signatured_type *entry = (struct signatured_type *) *slot;
 585   struct partial_symtab *psymtab = entry->per_cu.v.psymtab;
 586
 587   write_psymbols (info->symtab,
 588                   info->psyms_seen,
 589                   (info->objfile->partial_symtabs->global_psymbols.data ()
 590                    + psymtab->globals_offset),
 591                   psymtab->n_global_syms, info->cu_index,
 592                   0);
 593   write_psymbols (info->symtab,
 594                   info->psyms_seen,
 595                   (info->objfile->partial_symtabs->static_psymbols.data ()
 596                    + psymtab->statics_offset),
 597                   psymtab->n_static_syms, info->cu_index,
 598                   1);
 599
 600   info->types_list.append_uint (8, BFD_ENDIAN_LITTLE,
 601                                 to_underlying (entry->per_cu.sect_off));
 602   info->types_list.append_uint (8, BFD_ENDIAN_LITTLE,
 603                                 to_underlying (entry->type_offset_in_tu));
 604   info->types_list.append_uint (8, BFD_ENDIAN_LITTLE, entry->signature);
 605
 606   ++info->cu_index;
 607
 608   return 1;
 609 }
 610
 611 /* Recurse into all "included" dependencies and count their symbols as
 612    if they appeared in this psymtab.  */
 613
 614 static void
 615 recursively_count_psymbols (struct partial_symtab *psymtab,
 616                             size_t &psyms_seen)
 617 {
 618   for (int i = 0; i < psymtab->number_of_dependencies; ++i)
 619     if (psymtab->dependencies[i]->user != NULL)
 620       recursively_count_psymbols (psymtab->dependencies[i],
 621                                   psyms_seen);
 622
 623   psyms_seen += psymtab->n_global_syms;
 624   psyms_seen += psymtab->n_static_syms;
 625 }
 626
 627 /* Recurse into all "included" dependencies and write their symbols as
 628    if they appeared in this psymtab.  */
 629
 630 static void
 631 recursively_write_psymbols (struct objfile *objfile,
 632                             struct partial_symtab *psymtab,
 633                             struct mapped_symtab *symtab,
 634                             std::unordered_set<partial_symbol *> &psyms_seen,
 635                             offset_type cu_index)
 636 {
 637   int i;
 638
 639   for (i = 0; i < psymtab->number_of_dependencies; ++i)
 640     if (psymtab->dependencies[i]->user != NULL)
 641       recursively_write_psymbols (objfile, psymtab->dependencies[i],
 642                                   symtab, psyms_seen, cu_index);
 643
 644   write_psymbols (symtab,
 645                   psyms_seen,
 646                   (objfile->partial_symtabs->global_psymbols.data ()
 647                    + psymtab->globals_offset),
 648                   psymtab->n_global_syms, cu_index,
 649                   0);
 650   write_psymbols (symtab,
 651                   psyms_seen,
 652                   (objfile->partial_symtabs->static_psymbols.data ()
 653                    + psymtab->statics_offset),
 654                   psymtab->n_static_syms, cu_index,
 655                   1);
 656 }
 657
 658 /* DWARF-5 .debug_names builder.  */
 659 class debug_names
 660 {
 661 public:
 662   debug_names (struct dwarf2_per_objfile *dwarf2_per_objfile, bool is_dwarf64,
 663                bfd_endian dwarf5_byte_order)
 664     : m_dwarf5_byte_order (dwarf5_byte_order),
 665       m_dwarf32 (dwarf5_byte_order),
 666       m_dwarf64 (dwarf5_byte_order),
 667       m_dwarf (is_dwarf64
 668                ? static_cast<dwarf &> (m_dwarf64)
 669                : static_cast<dwarf &> (m_dwarf32)),
 670       m_name_table_string_offs (m_dwarf.name_table_string_offs),
 671       m_name_table_entry_offs (m_dwarf.name_table_entry_offs),
 672       m_debugstrlookup (dwarf2_per_objfile)
 673   {}
 674
 675   int dwarf5_offset_size () const
 676   {
 677     const bool dwarf5_is_dwarf64 = &m_dwarf == &m_dwarf64;
 678     return dwarf5_is_dwarf64 ? 8 : 4;
 679   }
 680
 681   /* Is this symbol from DW_TAG_compile_unit or DW_TAG_type_unit?  */
 682   enum class unit_kind { cu, tu };
 683
 684   /* Insert one symbol.  */
 685   void insert (const partial_symbol *psym, int cu_index, bool is_static,
 686                unit_kind kind)
 687   {
 688     const int dwarf_tag = psymbol_tag (psym);
 689     if (dwarf_tag == 0)
 690       return;
 691     const char *name = psym->ginfo.search_name ();
 692
 693     if (psym->ginfo.language () == language_ada)
 694       {
 695         /* We want to ensure that the Ada main function's name appears
 696            verbatim in the index.  However, this name will be of the
 697            form "_ada_mumble", and will be rewritten by ada_decode.
 698            So, recognize it specially here and add it to the index by
 699            hand.  */
 700         if (strcmp (main_name (), name) == 0)
 701           {
 702             const auto insertpair
 703               = m_name_to_value_set.emplace (c_str_view (name),
 704                                              std::set<symbol_value> ());
 705             std::set<symbol_value> &value_set = insertpair.first->second;
 706             value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static,
 707                                              kind));
 708           }
 709
 710         /* In order for the index to work when read back into gdb, it
 711            has to supply a funny form of the name: it should be the
 712            encoded name, with any suffixes stripped.  Using the
 713            ordinary encoded name will not work properly with the
 714            searching logic in find_name_components_bounds; nor will
 715            using the decoded name.  Furthermore, an Ada "verbatim"
 716            name (of the form "<MumBle>") must be entered without the
 717            angle brackets.  Note that the current index is unusual,
 718            see PR symtab/24820 for details.  */
 719         std::string decoded = ada_decode (name);
 720         if (decoded[0] == '<')
 721           name = (char *) obstack_copy0 (&m_string_obstack,
 722                                          decoded.c_str () + 1,
 723                                          decoded.length () - 2);
 724         else
 725           name = obstack_strdup (&m_string_obstack,
 726                                  ada_encode (decoded.c_str ()));
 727       }
 728
 729     const auto insertpair
 730       = m_name_to_value_set.emplace (c_str_view (name),
 731                                      std::set<symbol_value> ());
 732     std::set<symbol_value> &value_set = insertpair.first->second;
 733     value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static, kind));
 734   }
 735
 736   /* Build all the tables.  All symbols must be already inserted.
 737      This function does not call file_write, caller has to do it
 738      afterwards.  */
 739   void build ()
 740   {
 741     /* Verify the build method has not be called twice.  */
 742     gdb_assert (m_abbrev_table.empty ());
 743     const size_t name_count = m_name_to_value_set.size ();
 744     m_bucket_table.resize
 745       (std::pow (2, std::ceil (std::log2 (name_count * 4 / 3))));
 746     m_hash_table.reserve (name_count);
 747     m_name_table_string_offs.reserve (name_count);
 748     m_name_table_entry_offs.reserve (name_count);
 749
 750     /* Map each hash of symbol to its name and value.  */
 751     struct hash_it_pair
 752     {
 753       uint32_t hash;
 754       decltype (m_name_to_value_set)::const_iterator it;
 755     };
 756     std::vector<std::forward_list<hash_it_pair>> bucket_hash;
 757     bucket_hash.resize (m_bucket_table.size ());
 758     for (decltype (m_name_to_value_set)::const_iterator it
 759            = m_name_to_value_set.cbegin ();
 760          it != m_name_to_value_set.cend ();
 761          ++it)
 762       {
 763         const char *const name = it->first.c_str ();
 764         const uint32_t hash = dwarf5_djb_hash (name);
 765         hash_it_pair hashitpair;
 766         hashitpair.hash = hash;
 767         hashitpair.it = it;
 768         auto &slot = bucket_hash[hash % bucket_hash.size()];
 769         slot.push_front (std::move (hashitpair));
 770       }
 771     for (size_t bucket_ix = 0; bucket_ix < bucket_hash.size (); ++bucket_ix)
 772       {
 773         const std::forward_list<hash_it_pair> &hashitlist
 774           = bucket_hash[bucket_ix];
 775         if (hashitlist.empty ())
 776           continue;
 777         uint32_t &bucket_slot = m_bucket_table[bucket_ix];
 778         /* The hashes array is indexed starting at 1.  */
 779         store_unsigned_integer (reinterpret_cast<gdb_byte *> (&bucket_slot),
 780                                 sizeof (bucket_slot), m_dwarf5_byte_order,
 781                                 m_hash_table.size () + 1);
 782         for (const hash_it_pair &hashitpair : hashitlist)
 783           {
 784             m_hash_table.push_back (0);
 785             store_unsigned_integer (reinterpret_cast<gdb_byte *>
 786                                                         (&m_hash_table.back ()),
 787                                     sizeof (m_hash_table.back ()),
 788                                     m_dwarf5_byte_order, hashitpair.hash);
 789             const c_str_view &name = hashitpair.it->first;
 790             const std::set<symbol_value> &value_set = hashitpair.it->second;
 791             m_name_table_string_offs.push_back_reorder
 792               (m_debugstrlookup.lookup (name.c_str ()));
 793             m_name_table_entry_offs.push_back_reorder (m_entry_pool.size ());
 794             gdb_assert (!value_set.empty ());
 795             for (const symbol_value &value : value_set)
 796               {
 797                 int &idx = m_indexkey_to_idx[index_key (value.dwarf_tag,
 798                                                         value.is_static,
 799                                                         value.kind)];
 800                 if (idx == 0)
 801                   {
 802                     idx = m_idx_next++;
 803                     m_abbrev_table.append_unsigned_leb128 (idx);
 804                     m_abbrev_table.append_unsigned_leb128 (value.dwarf_tag);
 805                     m_abbrev_table.append_unsigned_leb128
 806                               (value.kind == unit_kind::cu ? DW_IDX_compile_unit
 807                                                            : DW_IDX_type_unit);
 808                     m_abbrev_table.append_unsigned_leb128 (DW_FORM_udata);
 809                     m_abbrev_table.append_unsigned_leb128 (value.is_static
 810                                                            ? DW_IDX_GNU_internal
 811                                                            : DW_IDX_GNU_external);
 812                     m_abbrev_table.append_unsigned_leb128 (DW_FORM_flag_present);
 813
 814                     /* Terminate attributes list.  */
 815                     m_abbrev_table.append_unsigned_leb128 (0);
 816                     m_abbrev_table.append_unsigned_leb128 (0);
 817                   }
 818
 819                 m_entry_pool.append_unsigned_leb128 (idx);
 820                 m_entry_pool.append_unsigned_leb128 (value.cu_index);
 821               }
 822
 823             /* Terminate the list of CUs.  */
 824             m_entry_pool.append_unsigned_leb128 (0);
 825           }
 826       }
 827     gdb_assert (m_hash_table.size () == name_count);
 828
 829     /* Terminate tags list.  */
 830     m_abbrev_table.append_unsigned_leb128 (0);
 831   }
 832
 833   /* Return .debug_names bucket count.  This must be called only after
 834      calling the build method.  */
 835   uint32_t bucket_count () const
 836   {
 837     /* Verify the build method has been already called.  */
 838     gdb_assert (!m_abbrev_table.empty ());
 839     const uint32_t retval = m_bucket_table.size ();
 840
 841     /* Check for overflow.  */
 842     gdb_assert (retval == m_bucket_table.size ());
 843     return retval;
 844   }
 845
 846   /* Return .debug_names names count.  This must be called only after
 847      calling the build method.  */
 848   uint32_t name_count () const
 849   {
 850     /* Verify the build method has been already called.  */
 851     gdb_assert (!m_abbrev_table.empty ());
 852     const uint32_t retval = m_hash_table.size ();
 853
 854     /* Check for overflow.  */
 855     gdb_assert (retval == m_hash_table.size ());
 856     return retval;
 857   }
 858
 859   /* Return number of bytes of .debug_names abbreviation table.  This
 860      must be called only after calling the build method.  */
 861   uint32_t abbrev_table_bytes () const
 862   {
 863     gdb_assert (!m_abbrev_table.empty ());
 864     return m_abbrev_table.size ();
 865   }
 866
 867   /* Recurse into all "included" dependencies and store their symbols
 868      as if they appeared in this psymtab.  */
 869   void recursively_write_psymbols
 870     (struct objfile *objfile,
 871      struct partial_symtab *psymtab,
 872      std::unordered_set<partial_symbol *> &psyms_seen,
 873      int cu_index)
 874   {
 875     for (int i = 0; i < psymtab->number_of_dependencies; ++i)
 876       if (psymtab->dependencies[i]->user != NULL)
 877         recursively_write_psymbols (objfile, psymtab->dependencies[i],
 878                                     psyms_seen, cu_index);
 879
 880     write_psymbols (psyms_seen,
 881                     (objfile->partial_symtabs->global_psymbols.data ()
 882                      + psymtab->globals_offset),
 883                     psymtab->n_global_syms, cu_index, false, unit_kind::cu);
 884     write_psymbols (psyms_seen,
 885                     (objfile->partial_symtabs->static_psymbols.data ()
 886                      + psymtab->statics_offset),
 887                     psymtab->n_static_syms, cu_index, true, unit_kind::cu);
 888   }
 889
 890   /* Return number of bytes the .debug_names section will have.  This
 891      must be called only after calling the build method.  */
 892   size_t bytes () const
 893   {
 894     /* Verify the build method has been already called.  */
 895     gdb_assert (!m_abbrev_table.empty ());
 896     size_t expected_bytes = 0;
 897     expected_bytes += m_bucket_table.size () * sizeof (m_bucket_table[0]);
 898     expected_bytes += m_hash_table.size () * sizeof (m_hash_table[0]);
 899     expected_bytes += m_name_table_string_offs.bytes ();
 900     expected_bytes += m_name_table_entry_offs.bytes ();
 901     expected_bytes += m_abbrev_table.size ();
 902     expected_bytes += m_entry_pool.size ();
 903     return expected_bytes;
 904   }
 905
 906   /* Write .debug_names to FILE_NAMES and .debug_str addition to
 907      FILE_STR.  This must be called only after calling the build
 908      method.  */
 909   void file_write (FILE *file_names, FILE *file_str) const
 910   {
 911     /* Verify the build method has been already called.  */
 912     gdb_assert (!m_abbrev_table.empty ());
 913     ::file_write (file_names, m_bucket_table);
 914     ::file_write (file_names, m_hash_table);
 915     m_name_table_string_offs.file_write (file_names);
 916     m_name_table_entry_offs.file_write (file_names);
 917     m_abbrev_table.file_write (file_names);
 918     m_entry_pool.file_write (file_names);
 919     m_debugstrlookup.file_write (file_str);
 920   }
 921
 922   /* A helper user data for write_one_signatured_type.  */
 923   class write_one_signatured_type_data
 924   {
 925   public:
 926     write_one_signatured_type_data (debug_names &nametable_,
 927                                     signatured_type_index_data &&info_)
 928     : nametable (nametable_), info (std::move (info_))
 929     {}
 930     debug_names &nametable;
 931     struct signatured_type_index_data info;
 932   };
 933
 934   /* A helper function to pass write_one_signatured_type to
 935      htab_traverse_noresize.  */
 936   static int
 937   write_one_signatured_type (void **slot, void *d)
 938   {
 939     write_one_signatured_type_data *data = (write_one_signatured_type_data *) d;
 940     struct signatured_type_index_data *info = &data->info;
 941     struct signatured_type *entry = (struct signatured_type *) *slot;
 942
 943     data->nametable.write_one_signatured_type (entry, info);
 944
 945     return 1;
 946   }
 947
 948 private:
 949
 950   /* Storage for symbol names mapping them to their .debug_str section
 951      offsets.  */
 952   class debug_str_lookup
 953   {
 954   public:
 955
 956     /* Object constructor to be called for current DWARF2_PER_OBJFILE.
 957        All .debug_str section strings are automatically stored.  */
 958     debug_str_lookup (struct dwarf2_per_objfile *dwarf2_per_objfile)
 959       : m_abfd (dwarf2_per_objfile->objfile->obfd),
 960         m_dwarf2_per_objfile (dwarf2_per_objfile)
 961     {
 962       dwarf2_read_section (dwarf2_per_objfile->objfile,
 963                            &dwarf2_per_objfile->str);
 964       if (dwarf2_per_objfile->str.buffer == NULL)
 965         return;
 966       for (const gdb_byte *data = dwarf2_per_objfile->str.buffer;
 967            data < (dwarf2_per_objfile->str.buffer
 968                    + dwarf2_per_objfile->str.size);)
 969         {
 970           const char *const s = reinterpret_cast<const char *> (data);
 971           const auto insertpair
 972             = m_str_table.emplace (c_str_view (s),
 973                                    data - dwarf2_per_objfile->str.buffer);
 974           if (!insertpair.second)
 975             complaint (_("Duplicate string \"%s\" in "
 976                          ".debug_str section [in module %s]"),
 977                        s, bfd_get_filename (m_abfd));
 978           data += strlen (s) + 1;
 979         }
 980     }
 981
 982     /* Return offset of symbol name S in the .debug_str section.  Add
 983        such symbol to the section's end if it does not exist there
 984        yet.  */
 985     size_t lookup (const char *s)
 986     {
 987       const auto it = m_str_table.find (c_str_view (s));
 988       if (it != m_str_table.end ())
 989         return it->second;
 990       const size_t offset = (m_dwarf2_per_objfile->str.size
 991                              + m_str_add_buf.size ());
 992       m_str_table.emplace (c_str_view (s), offset);
 993       m_str_add_buf.append_cstr0 (s);
 994       return offset;
 995     }
 996
 997     /* Append the end of the .debug_str section to FILE.  */
 998     void file_write (FILE *file) const
 999     {
1000       m_str_add_buf.file_write (file);
1001     }
1002
1003   private:
1004     std::unordered_map<c_str_view, size_t, c_str_view_hasher> m_str_table;
1005     bfd *const m_abfd;
1006     struct dwarf2_per_objfile *m_dwarf2_per_objfile;
1007
1008     /* Data to add at the end of .debug_str for new needed symbol names.  */
1009     data_buf m_str_add_buf;
1010   };
1011
1012   /* Container to map used DWARF tags to their .debug_names abbreviation
1013      tags.  */
1014   class index_key
1015   {
1016   public:
1017     index_key (int dwarf_tag_, bool is_static_, unit_kind kind_)
1018       : dwarf_tag (dwarf_tag_), is_static (is_static_), kind (kind_)
1019     {
1020     }
1021
1022     bool
1023     operator== (const index_key &other) const
1024     {
1025       return (dwarf_tag == other.dwarf_tag && is_static == other.is_static
1026               && kind == other.kind);
1027     }
1028
1029     const int dwarf_tag;
1030     const bool is_static;
1031     const unit_kind kind;
1032   };
1033
1034   /* Provide std::unordered_map::hasher for index_key.  */
1035   class index_key_hasher
1036   {
1037   public:
1038     size_t
1039     operator () (const index_key &key) const
1040     {
1041       return (std::hash<int>() (key.dwarf_tag) << 1) | key.is_static;
1042     }
1043   };
1044
1045   /* Parameters of one symbol entry.  */
1046   class symbol_value
1047   {
1048   public:
1049     const int dwarf_tag, cu_index;
1050     const bool is_static;
1051     const unit_kind kind;
1052
1053     symbol_value (int dwarf_tag_, int cu_index_, bool is_static_,
1054                   unit_kind kind_)
1055       : dwarf_tag (dwarf_tag_), cu_index (cu_index_), is_static (is_static_),
1056         kind (kind_)
1057     {}
1058
1059     bool
1060     operator< (const symbol_value &other) const
1061     {
1062 #define X(n) \
1063   do \
1064     { \
1065       if (n < other.n) \
1066         return true; \
1067       if (n > other.n) \
1068         return false; \
1069     } \
1070   while (0)
1071       X (dwarf_tag);
1072       X (is_static);
1073       X (kind);
1074       X (cu_index);
1075 #undef X
1076       return false;
1077     }
1078   };
1079
1080   /* Abstract base class to unify DWARF-32 and DWARF-64 name table
1081      output.  */
1082   class offset_vec
1083   {
1084   protected:
1085     const bfd_endian dwarf5_byte_order;
1086   public:
1087     explicit offset_vec (bfd_endian dwarf5_byte_order_)
1088       : dwarf5_byte_order (dwarf5_byte_order_)
1089     {}
1090
1091     /* Call std::vector::reserve for NELEM elements.  */
1092     virtual void reserve (size_t nelem) = 0;
1093
1094     /* Call std::vector::push_back with store_unsigned_integer byte
1095        reordering for ELEM.  */
1096     virtual void push_back_reorder (size_t elem) = 0;
1097
1098     /* Return expected output size in bytes.  */
1099     virtual size_t bytes () const = 0;
1100
1101     /* Write name table to FILE.  */
1102     virtual void file_write (FILE *file) const = 0;
1103   };
1104
1105   /* Template to unify DWARF-32 and DWARF-64 output.  */
1106   template<typename OffsetSize>
1107   class offset_vec_tmpl : public offset_vec
1108   {
1109   public:
1110     explicit offset_vec_tmpl (bfd_endian dwarf5_byte_order_)
1111       : offset_vec (dwarf5_byte_order_)
1112     {}
1113
1114     /* Implement offset_vec::reserve.  */
1115     void reserve (size_t nelem) override
1116     {
1117       m_vec.reserve (nelem);
1118     }
1119
1120     /* Implement offset_vec::push_back_reorder.  */
1121     void push_back_reorder (size_t elem) override
1122     {
1123       m_vec.push_back (elem);
1124       /* Check for overflow.  */
1125       gdb_assert (m_vec.back () == elem);
1126       store_unsigned_integer (reinterpret_cast<gdb_byte *> (&m_vec.back ()),
1127                               sizeof (m_vec.back ()), dwarf5_byte_order, elem);
1128     }
1129
1130     /* Implement offset_vec::bytes.  */
1131     size_t bytes () const override
1132     {
1133       return m_vec.size () * sizeof (m_vec[0]);
1134     }
1135
1136     /* Implement offset_vec::file_write.  */
1137     void file_write (FILE *file) const override
1138     {
1139       ::file_write (file, m_vec);
1140     }
1141
1142   private:
1143     std::vector<OffsetSize> m_vec;
1144   };
1145
1146   /* Base class to unify DWARF-32 and DWARF-64 .debug_names output
1147      respecting name table width.  */
1148   class dwarf
1149   {
1150   public:
1151     offset_vec &name_table_string_offs, &name_table_entry_offs;
1152
1153     dwarf (offset_vec &name_table_string_offs_,
1154            offset_vec &name_table_entry_offs_)
1155       : name_table_string_offs (name_table_string_offs_),
1156         name_table_entry_offs (name_table_entry_offs_)
1157     {
1158     }
1159   };
1160
1161   /* Template to unify DWARF-32 and DWARF-64 .debug_names output
1162      respecting name table width.  */
1163   template<typename OffsetSize>
1164   class dwarf_tmpl : public dwarf
1165   {
1166   public:
1167     explicit dwarf_tmpl (bfd_endian dwarf5_byte_order_)
1168       : dwarf (m_name_table_string_offs, m_name_table_entry_offs),
1169         m_name_table_string_offs (dwarf5_byte_order_),
1170         m_name_table_entry_offs (dwarf5_byte_order_)
1171     {}
1172
1173   private:
1174     offset_vec_tmpl<OffsetSize> m_name_table_string_offs;
1175     offset_vec_tmpl<OffsetSize> m_name_table_entry_offs;
1176   };
1177
1178   /* Try to reconstruct original DWARF tag for given partial_symbol.
1179      This function is not DWARF-5 compliant but it is sufficient for
1180      GDB as a DWARF-5 index consumer.  */
1181   static int psymbol_tag (const struct partial_symbol *psym)
1182   {
1183     domain_enum domain = psym->domain;
1184     enum address_class aclass = psym->aclass;
1185
1186     switch (domain)
1187       {
1188       case VAR_DOMAIN:
1189         switch (aclass)
1190           {
1191           case LOC_BLOCK:
1192             return DW_TAG_subprogram;
1193           case LOC_TYPEDEF:
1194             return DW_TAG_typedef;
1195           case LOC_COMPUTED:
1196           case LOC_CONST_BYTES:
1197           case LOC_OPTIMIZED_OUT:
1198           case LOC_STATIC:
1199             return DW_TAG_variable;
1200           case LOC_CONST:
1201             /* Note: It's currently impossible to recognize psyms as enum values
1202                short of reading the type info.  For now punt.  */
1203             return DW_TAG_variable;
1204           default:
1205             /* There are other LOC_FOO values that one might want to classify
1206                as variables, but dwarf2read.c doesn't currently use them.  */
1207             return DW_TAG_variable;
1208           }
1209       case STRUCT_DOMAIN:
1210         return DW_TAG_structure_type;
1211       default:
1212         return 0;
1213       }
1214   }
1215
1216   /* Call insert for all partial symbols and mark them in PSYMS_SEEN.  */
1217   void write_psymbols (std::unordered_set<partial_symbol *> &psyms_seen,
1218                        struct partial_symbol **psymp, int count, int cu_index,
1219                        bool is_static, unit_kind kind)
1220   {
1221     for (; count-- > 0; ++psymp)
1222       {
1223         struct partial_symbol *psym = *psymp;
1224
1225         /* Only add a given psymbol once.  */
1226         if (psyms_seen.insert (psym).second)
1227           insert (psym, cu_index, is_static, kind);
1228       }
1229   }
1230
1231   /* A helper function that writes a single signatured_type
1232      to a debug_names.  */
1233   void
1234   write_one_signatured_type (struct signatured_type *entry,
1235                              struct signatured_type_index_data *info)
1236   {
1237     struct partial_symtab *psymtab = entry->per_cu.v.psymtab;
1238
1239     write_psymbols (info->psyms_seen,
1240                     (info->objfile->partial_symtabs->global_psymbols.data ()
1241                      + psymtab->globals_offset),
1242                     psymtab->n_global_syms, info->cu_index, false,
1243                     unit_kind::tu);
1244     write_psymbols (info->psyms_seen,
1245                     (info->objfile->partial_symtabs->static_psymbols.data ()
1246                      + psymtab->statics_offset),
1247                     psymtab->n_static_syms, info->cu_index, true,
1248                     unit_kind::tu);
1249
1250     info->types_list.append_uint (dwarf5_offset_size (), m_dwarf5_byte_order,
1251                                   to_underlying (entry->per_cu.sect_off));
1252
1253     ++info->cu_index;
1254   }
1255
1256   /* Store value of each symbol.  */
1257   std::unordered_map<c_str_view, std::set<symbol_value>, c_str_view_hasher>
1258     m_name_to_value_set;
1259
1260   /* Tables of DWARF-5 .debug_names.  They are in object file byte
1261      order.  */
1262   std::vector<uint32_t> m_bucket_table;
1263   std::vector<uint32_t> m_hash_table;
1264
1265   const bfd_endian m_dwarf5_byte_order;
1266   dwarf_tmpl<uint32_t> m_dwarf32;
1267   dwarf_tmpl<uint64_t> m_dwarf64;
1268   dwarf &m_dwarf;
1269   offset_vec &m_name_table_string_offs, &m_name_table_entry_offs;
1270   debug_str_lookup m_debugstrlookup;
1271
1272   /* Map each used .debug_names abbreviation tag parameter to its
1273      index value.  */
1274   std::unordered_map<index_key, int, index_key_hasher> m_indexkey_to_idx;
1275
1276   /* Next unused .debug_names abbreviation tag for
1277      m_indexkey_to_idx.  */
1278   int m_idx_next = 1;
1279
1280   /* .debug_names abbreviation table.  */
1281   data_buf m_abbrev_table;
1282
1283   /* .debug_names entry pool.  */
1284   data_buf m_entry_pool;
1285
1286   /* Temporary storage for Ada names.  */
1287   auto_obstack m_string_obstack;
1288 };
1289
1290 /* Return iff any of the needed offsets does not fit into 32-bit
1291    .debug_names section.  */
1292
1293 static bool
1294 check_dwarf64_offsets (struct dwarf2_per_objfile *dwarf2_per_objfile)
1295 {
1296   for (dwarf2_per_cu_data *per_cu : dwarf2_per_objfile->all_comp_units)
1297     {
1298       if (to_underlying (per_cu->sect_off) >= (static_cast<uint64_t> (1) << 32))
1299         return true;
1300     }
1301   for (const signatured_type *sigtype : dwarf2_per_objfile->all_type_units)
1302     {
1303       const dwarf2_per_cu_data &per_cu = sigtype->per_cu;
1304
1305       if (to_underlying (per_cu.sect_off) >= (static_cast<uint64_t> (1) << 32))
1306         return true;
1307     }
1308   return false;
1309 }
1310
1311 /* The psyms_seen set is potentially going to be largish (~40k
1312    elements when indexing a -g3 build of GDB itself).  Estimate the
1313    number of elements in order to avoid too many rehashes, which
1314    require rebuilding buckets and thus many trips to
1315    malloc/free.  */
1316
1317 static size_t
1318 psyms_seen_size (struct dwarf2_per_objfile *dwarf2_per_objfile)
1319 {
1320   size_t psyms_count = 0;
1321   for (dwarf2_per_cu_data *per_cu : dwarf2_per_objfile->all_comp_units)
1322     {
1323       struct partial_symtab *psymtab = per_cu->v.psymtab;
1324
1325       if (psymtab != NULL && psymtab->user == NULL)
1326         recursively_count_psymbols (psymtab, psyms_count);
1327     }
1328   /* Generating an index for gdb itself shows a ratio of
1329      TOTAL_SEEN_SYMS/UNIQUE_SYMS or ~5.  4 seems like a good bet.  */
1330   return psyms_count / 4;
1331 }
1332
1333 /* Assert that FILE's size is EXPECTED_SIZE.  Assumes file's seek
1334    position is at the end of the file.  */
1335
1336 static void
1337 assert_file_size (FILE *file, size_t expected_size)
1338 {
1339   const auto file_size = ftell (file);
1340   if (file_size == -1)
1341     perror_with_name (("ftell"));
1342   gdb_assert (file_size == expected_size);
1343 }
1344
1345 /* Write a gdb index file to OUT_FILE from all the sections passed as
1346    arguments.  */
1347
1348 static void
1349 write_gdbindex_1 (FILE *out_file,
1350                   const data_buf &cu_list,
1351                   const data_buf &types_cu_list,
1352                   const data_buf &addr_vec,
1353                   const data_buf &symtab_vec,
1354                   const data_buf &constant_pool)
1355 {
1356   data_buf contents;
1357   const offset_type size_of_header = 6 * sizeof (offset_type);
1358   offset_type total_len = size_of_header;
1359
1360   /* The version number.  */
1361   contents.append_data (MAYBE_SWAP (8));
1362
1363   /* The offset of the CU list from the start of the file.  */
1364   contents.append_data (MAYBE_SWAP (total_len));
1365   total_len += cu_list.size ();
1366
1367   /* The offset of the types CU list from the start of the file.  */
1368   contents.append_data (MAYBE_SWAP (total_len));
1369   total_len += types_cu_list.size ();
1370
1371   /* The offset of the address table from the start of the file.  */
1372   contents.append_data (MAYBE_SWAP (total_len));
1373   total_len += addr_vec.size ();
1374
1375   /* The offset of the symbol table from the start of the file.  */
1376   contents.append_data (MAYBE_SWAP (total_len));
1377   total_len += symtab_vec.size ();
1378
1379   /* The offset of the constant pool from the start of the file.  */
1380   contents.append_data (MAYBE_SWAP (total_len));
1381   total_len += constant_pool.size ();
1382
1383   gdb_assert (contents.size () == size_of_header);
1384
1385   contents.file_write (out_file);
1386   cu_list.file_write (out_file);
1387   types_cu_list.file_write (out_file);
1388   addr_vec.file_write (out_file);
1389   symtab_vec.file_write (out_file);
1390   constant_pool.file_write (out_file);
1391
1392   assert_file_size (out_file, total_len);
1393 }
1394
1395 /* Write contents of a .gdb_index section for OBJFILE into OUT_FILE.
1396    If OBJFILE has an associated dwz file, write contents of a .gdb_index
1397    section for that dwz file into DWZ_OUT_FILE.  If OBJFILE does not have an
1398    associated dwz file, DWZ_OUT_FILE must be NULL.  */
1399
1400 static void
1401 write_gdbindex (struct dwarf2_per_objfile *dwarf2_per_objfile, FILE *out_file,
1402                 FILE *dwz_out_file)
1403 {
1404   struct objfile *objfile = dwarf2_per_objfile->objfile;
1405   mapped_symtab symtab;
1406   data_buf objfile_cu_list;
1407   data_buf dwz_cu_list;
1408
1409   /* While we're scanning CU's create a table that maps a psymtab pointer
1410      (which is what addrmap records) to its index (which is what is recorded
1411      in the index file).  This will later be needed to write the address
1412      table.  */
1413   psym_index_map cu_index_htab;
1414   cu_index_htab.reserve (dwarf2_per_objfile->all_comp_units.size ());
1415
1416   /* The CU list is already sorted, so we don't need to do additional
1417      work here.  Also, the debug_types entries do not appear in
1418      all_comp_units, but only in their own hash table.  */
1419
1420   std::unordered_set<partial_symbol *> psyms_seen
1421     (psyms_seen_size (dwarf2_per_objfile));
1422   for (int i = 0; i < dwarf2_per_objfile->all_comp_units.size (); ++i)
1423     {
1424       struct dwarf2_per_cu_data *per_cu
1425         = dwarf2_per_objfile->all_comp_units[i];
1426       struct partial_symtab *psymtab = per_cu->v.psymtab;
1427
1428       /* CU of a shared file from 'dwz -m' may be unused by this main file.
1429          It may be referenced from a local scope but in such case it does not
1430          need to be present in .gdb_index.  */
1431       if (psymtab == NULL)
1432         continue;
1433
1434       if (psymtab->user == NULL)
1435         recursively_write_psymbols (objfile, psymtab, &symtab,
1436                                     psyms_seen, i);
1437
1438       const auto insertpair = cu_index_htab.emplace (psymtab, i);
1439       gdb_assert (insertpair.second);
1440
1441       /* The all_comp_units list contains CUs read from the objfile as well as
1442          from the eventual dwz file.  We need to place the entry in the
1443          corresponding index.  */
1444       data_buf &cu_list = per_cu->is_dwz ? dwz_cu_list : objfile_cu_list;
1445       cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1446                            to_underlying (per_cu->sect_off));
1447       cu_list.append_uint (8, BFD_ENDIAN_LITTLE, per_cu->length);
1448     }
1449
1450   /* Dump the address map.  */
1451   data_buf addr_vec;
1452   write_address_map (objfile, addr_vec, cu_index_htab);
1453
1454   /* Write out the .debug_type entries, if any.  */
1455   data_buf types_cu_list;
1456   if (dwarf2_per_objfile->signatured_types)
1457     {
1458       signatured_type_index_data sig_data (types_cu_list,
1459                                            psyms_seen);
1460
1461       sig_data.objfile = objfile;
1462       sig_data.symtab = &symtab;
1463       sig_data.cu_index = dwarf2_per_objfile->all_comp_units.size ();
1464       htab_traverse_noresize (dwarf2_per_objfile->signatured_types,
1465                               write_one_signatured_type, &sig_data);
1466     }
1467
1468   /* Now that we've processed all symbols we can shrink their cu_indices
1469      lists.  */
1470   uniquify_cu_indices (&symtab);
1471
1472   data_buf symtab_vec, constant_pool;
1473   write_hash_table (&symtab, symtab_vec, constant_pool);
1474
1475   write_gdbindex_1(out_file, objfile_cu_list, types_cu_list, addr_vec,
1476                    symtab_vec, constant_pool);
1477
1478   if (dwz_out_file != NULL)
1479     write_gdbindex_1 (dwz_out_file, dwz_cu_list, {}, {}, {}, {});
1480   else
1481     gdb_assert (dwz_cu_list.empty ());
1482 }
1483
1484 /* DWARF-5 augmentation string for GDB's DW_IDX_GNU_* extension.  */
1485 static const gdb_byte dwarf5_gdb_augmentation[] = { 'G', 'D', 'B', 0 };
1486
1487 /* Write a new .debug_names section for OBJFILE into OUT_FILE, write
1488    needed addition to .debug_str section to OUT_FILE_STR.  Return how
1489    many bytes were expected to be written into OUT_FILE.  */
1490
1491 static void
1492 write_debug_names (struct dwarf2_per_objfile *dwarf2_per_objfile,
1493                    FILE *out_file, FILE *out_file_str)
1494 {
1495   const bool dwarf5_is_dwarf64 = check_dwarf64_offsets (dwarf2_per_objfile);
1496   struct objfile *objfile = dwarf2_per_objfile->objfile;
1497   const enum bfd_endian dwarf5_byte_order
1498     = gdbarch_byte_order (get_objfile_arch (objfile));
1499
1500   /* The CU list is already sorted, so we don't need to do additional
1501      work here.  Also, the debug_types entries do not appear in
1502      all_comp_units, but only in their own hash table.  */
1503   data_buf cu_list;
1504   debug_names nametable (dwarf2_per_objfile, dwarf5_is_dwarf64,
1505                          dwarf5_byte_order);
1506   std::unordered_set<partial_symbol *>
1507     psyms_seen (psyms_seen_size (dwarf2_per_objfile));
1508   for (int i = 0; i < dwarf2_per_objfile->all_comp_units.size (); ++i)
1509     {
1510       const dwarf2_per_cu_data *per_cu = dwarf2_per_objfile->all_comp_units[i];
1511       partial_symtab *psymtab = per_cu->v.psymtab;
1512
1513       /* CU of a shared file from 'dwz -m' may be unused by this main
1514          file.  It may be referenced from a local scope but in such
1515          case it does not need to be present in .debug_names.  */
1516       if (psymtab == NULL)
1517         continue;
1518
1519       if (psymtab->user == NULL)
1520         nametable.recursively_write_psymbols (objfile, psymtab, psyms_seen, i);
1521
1522       cu_list.append_uint (nametable.dwarf5_offset_size (), dwarf5_byte_order,
1523                            to_underlying (per_cu->sect_off));
1524     }
1525
1526   /* Write out the .debug_type entries, if any.  */
1527   data_buf types_cu_list;
1528   if (dwarf2_per_objfile->signatured_types)
1529     {
1530       debug_names::write_one_signatured_type_data sig_data (nametable,
1531                         signatured_type_index_data (types_cu_list, psyms_seen));
1532
1533       sig_data.info.objfile = objfile;
1534       /* It is used only for gdb_index.  */
1535       sig_data.info.symtab = nullptr;
1536       sig_data.info.cu_index = 0;
1537       htab_traverse_noresize (dwarf2_per_objfile->signatured_types,
1538                               debug_names::write_one_signatured_type,
1539                               &sig_data);
1540     }
1541
1542   nametable.build ();
1543
1544   /* No addr_vec - DWARF-5 uses .debug_aranges generated by GCC.  */
1545
1546   const offset_type bytes_of_header
1547     = ((dwarf5_is_dwarf64 ? 12 : 4)
1548        + 2 + 2 + 7 * 4
1549        + sizeof (dwarf5_gdb_augmentation));
1550   size_t expected_bytes = 0;
1551   expected_bytes += bytes_of_header;
1552   expected_bytes += cu_list.size ();
1553   expected_bytes += types_cu_list.size ();
1554   expected_bytes += nametable.bytes ();
1555   data_buf header;
1556
1557   if (!dwarf5_is_dwarf64)
1558     {
1559       const uint64_t size64 = expected_bytes - 4;
1560       gdb_assert (size64 < 0xfffffff0);
1561       header.append_uint (4, dwarf5_byte_order, size64);
1562     }
1563   else
1564     {
1565       header.append_uint (4, dwarf5_byte_order, 0xffffffff);
1566       header.append_uint (8, dwarf5_byte_order, expected_bytes - 12);
1567     }
1568
1569   /* The version number.  */
1570   header.append_uint (2, dwarf5_byte_order, 5);
1571
1572   /* Padding.  */
1573   header.append_uint (2, dwarf5_byte_order, 0);
1574
1575   /* comp_unit_count - The number of CUs in the CU list.  */
1576   header.append_uint (4, dwarf5_byte_order,
1577                       dwarf2_per_objfile->all_comp_units.size ());
1578
1579   /* local_type_unit_count - The number of TUs in the local TU
1580      list.  */
1581   header.append_uint (4, dwarf5_byte_order,
1582                       dwarf2_per_objfile->all_type_units.size ());
1583
1584   /* foreign_type_unit_count - The number of TUs in the foreign TU
1585      list.  */
1586   header.append_uint (4, dwarf5_byte_order, 0);
1587
1588   /* bucket_count - The number of hash buckets in the hash lookup
1589      table.  */
1590   header.append_uint (4, dwarf5_byte_order, nametable.bucket_count ());
1591
1592   /* name_count - The number of unique names in the index.  */
1593   header.append_uint (4, dwarf5_byte_order, nametable.name_count ());
1594
1595   /* abbrev_table_size - The size in bytes of the abbreviations
1596      table.  */
1597   header.append_uint (4, dwarf5_byte_order, nametable.abbrev_table_bytes ());
1598
1599   /* augmentation_string_size - The size in bytes of the augmentation
1600      string.  This value is rounded up to a multiple of 4.  */
1601   static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, "");
1602   header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation));
1603   header.append_data (dwarf5_gdb_augmentation);
1604
1605   gdb_assert (header.size () == bytes_of_header);
1606
1607   header.file_write (out_file);
1608   cu_list.file_write (out_file);
1609   types_cu_list.file_write (out_file);
1610   nametable.file_write (out_file, out_file_str);
1611
1612   assert_file_size (out_file, expected_bytes);
1613 }
1614
1615 /* This represents an index file being written (work-in-progress).
1616
1617    The data is initially written to a temporary file.  When the finalize method
1618    is called, the file is closed and moved to its final location.
1619
1620    On failure (if this object is being destroyed with having called finalize),
1621    the temporary file is closed and deleted.  */
1622
1623 struct index_wip_file
1624 {
1625   index_wip_file (const char *dir, const char *basename,
1626                   const char *suffix)
1627   {
1628     filename = (std::string (dir) + SLASH_STRING + basename
1629                 + suffix);
1630
1631     filename_temp = make_temp_filename (filename);
1632
1633     scoped_fd out_file_fd (gdb_mkostemp_cloexec (filename_temp.data (),
1634                                                  O_BINARY));
1635     if (out_file_fd.get () == -1)
1636       perror_with_name (("mkstemp"));
1637
1638     out_file = out_file_fd.to_file ("wb");
1639
1640     if (out_file == nullptr)
1641       error (_("Can't open `%s' for writing"), filename_temp.data ());
1642
1643     unlink_file.emplace (filename_temp.data ());
1644   }
1645
1646   void finalize ()
1647   {
1648     /* We want to keep the file.  */
1649     unlink_file->keep ();
1650
1651     /* Close and move the str file in place.  */
1652     unlink_file.reset ();
1653     if (rename (filename_temp.data (), filename.c_str ()) != 0)
1654       perror_with_name (("rename"));
1655   }
1656
1657   std::string filename;
1658   gdb::char_vector filename_temp;
1659
1660   /* Order matters here; we want FILE to be closed before
1661      FILENAME_TEMP is unlinked, because on MS-Windows one cannot
1662      delete a file that is still open.  So, we wrap the unlinker in an
1663      optional and emplace it once we know the file name.  */
1664   gdb::optional<gdb::unlinker> unlink_file;
1665
1666   gdb_file_up out_file;
1667 };
1668
1669 /* See dwarf-index-write.h.  */
1670
1671 void
1672 write_psymtabs_to_index (struct dwarf2_per_objfile *dwarf2_per_objfile,
1673                          const char *dir, const char *basename,
1674                          const char *dwz_basename,
1675                          dw_index_kind index_kind)
1676 {
1677   struct objfile *objfile = dwarf2_per_objfile->objfile;
1678
1679   if (dwarf2_per_objfile->using_index)
1680     error (_("Cannot use an index to create the index"));
1681
1682   if (dwarf2_per_objfile->types.size () > 1)
1683     error (_("Cannot make an index when the file has multiple .debug_types sections"));
1684
1685   if (!objfile->partial_symtabs->psymtabs
1686       || !objfile->partial_symtabs->psymtabs_addrmap)
1687     return;
1688
1689   struct stat st;
1690   if (stat (objfile_name (objfile), &st) < 0)
1691     perror_with_name (objfile_name (objfile));
1692
1693   const char *index_suffix = (index_kind == dw_index_kind::DEBUG_NAMES
1694                               ? INDEX5_SUFFIX : INDEX4_SUFFIX);
1695
1696   index_wip_file objfile_index_wip (dir, basename, index_suffix);
1697   gdb::optional<index_wip_file> dwz_index_wip;
1698
1699   if (dwz_basename != NULL)
1700       dwz_index_wip.emplace (dir, dwz_basename, index_suffix);
1701
1702   if (index_kind == dw_index_kind::DEBUG_NAMES)
1703     {
1704       index_wip_file str_wip_file (dir, basename, DEBUG_STR_SUFFIX);
1705
1706       write_debug_names (dwarf2_per_objfile, objfile_index_wip.out_file.get (),
1707                          str_wip_file.out_file.get ());
1708
1709       str_wip_file.finalize ();
1710     }
1711   else
1712     write_gdbindex (dwarf2_per_objfile, objfile_index_wip.out_file.get (),
1713                     (dwz_index_wip.has_value ()
1714                      ? dwz_index_wip->out_file.get () : NULL));
1715
1716   objfile_index_wip.finalize ();
1717
1718   if (dwz_index_wip.has_value ())
1719     dwz_index_wip->finalize ();
1720 }
1721
1722 /* Implementation of the `save gdb-index' command.
1723
1724    Note that the .gdb_index file format used by this command is
1725    documented in the GDB manual.  Any changes here must be documented
1726    there.  */
1727
1728 static void
1729 save_gdb_index_command (const char *arg, int from_tty)
1730 {
1731   const char dwarf5space[] = "-dwarf-5 ";
1732   dw_index_kind index_kind = dw_index_kind::GDB_INDEX;
1733
1734   if (!arg)
1735     arg = "";
1736
1737   arg = skip_spaces (arg);
1738   if (strncmp (arg, dwarf5space, strlen (dwarf5space)) == 0)
1739     {
1740       index_kind = dw_index_kind::DEBUG_NAMES;
1741       arg += strlen (dwarf5space);
1742       arg = skip_spaces (arg);
1743     }
1744
1745   if (!*arg)
1746     error (_("usage: save gdb-index [-dwarf-5] DIRECTORY"));
1747
1748   for (objfile *objfile : current_program_space->objfiles ())
1749     {
1750       struct stat st;
1751
1752       /* If the objfile does not correspond to an actual file, skip it.  */
1753       if (stat (objfile_name (objfile), &st) < 0)
1754         continue;
1755
1756       struct dwarf2_per_objfile *dwarf2_per_objfile
1757         = get_dwarf2_per_objfile (objfile);
1758
1759       if (dwarf2_per_objfile != NULL)
1760         {
1761           try
1762             {
1763               const char *basename = lbasename (objfile_name (objfile));
1764               const dwz_file *dwz = dwarf2_get_dwz_file (dwarf2_per_objfile);
1765               const char *dwz_basename = NULL;
1766
1767               if (dwz != NULL)
1768                 dwz_basename = lbasename (dwz->filename ());
1769
1770               write_psymtabs_to_index (dwarf2_per_objfile, arg, basename,
1771                                        dwz_basename, index_kind);
1772             }
1773           catch (const gdb_exception_error &except)
1774             {
1775               exception_fprintf (gdb_stderr, except,
1776                                  _("Error while writing index for `%s': "),
1777                                  objfile_name (objfile));
1778             }
1779             }
1780
1781     }
1782 }
1783
1784 void
1785 _initialize_dwarf_index_write ()
1786 {
1787   cmd_list_element *c = add_cmd ("gdb-index", class_files,
1788                                  save_gdb_index_command, _("\
1789 Save a gdb-index file.\n\
1790 Usage: save gdb-index [-dwarf-5] DIRECTORY\n\
1791 \n\
1792 No options create one file with .gdb-index extension for pre-DWARF-5\n\
1793 compatible .gdb_index section.  With -dwarf-5 creates two files with\n\
1794 extension .debug_names and .debug_str for DWARF-5 .debug_names section."),
1795                &save_cmdlist);
1796   set_cmd_completer (c, filename_completer);
1797 }