gold/merge.h

   1 // merge.h -- handle section merging for gold  -*- C++ -*-
   2
   3 // Copyright 2006, 2007 Free Software Foundation, Inc.
   4 // Written by Ian Lance Taylor <iant@google.com>.
   5
   6 // This file is part of gold.
   7
   8 // This program is free software; you can redistribute it and/or modify
   9 // it under the terms of the GNU General Public License as published by
  10 // the Free Software Foundation; either version 3 of the License, or
  11 // (at your option) any later version.
  12
  13 // This program is distributed in the hope that it will be useful,
  14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 // GNU General Public License for more details.
  17
  18 // You should have received a copy of the GNU General Public License
  19 // along with this program; if not, write to the Free Software
  20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
  21 // MA 02110-1301, USA.
  22
  23 #ifndef GOLD_MERGE_H
  24 #define GOLD_MERGE_H
  25
  26 #include <climits>
  27 #include <map>
  28 #include <vector>
  29
  30 #include "stringpool.h"
  31 #include "output.h"
  32
  33 namespace gold
  34 {
  35
  36 class Merge_map;
  37
  38 // For each object with merge sections, we store an Object_merge_map.
  39 // This is used to map locations in input sections to a merged output
  40 // section.  The output section itself is not recorded here--it can be
  41 // found in the map_to_output_ field of the Object.
  42
  43 class Object_merge_map
  44 {
  45  public:
  46   Object_merge_map()
  47     : first_shnum_(-1U), first_map_(),
  48       second_shnum_(-1U), second_map_(),
  49       section_merge_maps_()
  50   { }
  51
  52   ~Object_merge_map();
  53
  54   // Add a mapping for MERGE_MAP, for the bytes from OFFSET to OFFSET
  55   // + LENGTH in the input section SHNDX to OUTPUT_OFFSET in the
  56   // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
  57   // discarded.  OUTPUT_OFFSET is relative to the start of the merged
  58   // data in the output section.
  59   void
  60   add_mapping(const Merge_map*, unsigned int shndx, section_offset_type offset,
  61               section_size_type length, section_offset_type output_offset);
  62
  63   // Get the output offset for an input address.  MERGE_MAP is the map
  64   // we are looking for, or NULL if we don't care.  The input address
  65   // is at offset OFFSET in section SHNDX.  This sets *OUTPUT_OFFSET
  66   // to the offset in the output section; this will be -1 if the bytes
  67   // are not being copied to the output.  This returns true if the
  68   // mapping is known, false otherwise.  *OUTPUT_OFFSET is relative to
  69   // the start of the merged data in the output section.
  70   bool
  71   get_output_offset(const Merge_map*, unsigned int shndx,
  72                     section_offset_type offset,
  73                     section_offset_type *output_offset);
  74
  75   // Return whether this is the merge map for section SHNDX.
  76   bool
  77   is_merge_section_for(const Merge_map*, unsigned int shndx);
  78
  79   // Initialize an mapping from input offsets to output addresses for
  80   // section SHNDX.  STARTING_ADDRESS is the output address of the
  81   // merged section.
  82   template<int size>
  83   void
  84   initialize_input_to_output_map(
  85       unsigned int shndx,
  86       typename elfcpp::Elf_types<size>::Elf_Addr starting_address,
  87       Unordered_map<section_offset_type,
  88                     typename elfcpp::Elf_types<size>::Elf_Addr>*);
  89
  90  private:
  91   // Map input section offsets to a length and an output section
  92   // offset.  An output section offset of -1 means that this part of
  93   // the input section is being discarded.
  94   struct Input_merge_entry
  95   {
  96     // The offset in the input section.
  97     section_offset_type input_offset;
  98     // The length.
  99     section_size_type length;
 100     // The offset in the output section.
 101     section_offset_type output_offset;
 102   };
 103
 104   // A less-than comparison routine for Input_merge_entry.
 105   struct Input_merge_compare
 106   {
 107     bool
 108     operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const
 109     { return i1.input_offset < i2.input_offset; }
 110   };
 111
 112   // A list of entries for a particular input section.
 113   struct Input_merge_map
 114   {
 115     typedef std::vector<Input_merge_entry> Entries;
 116
 117     // We store these with the Relobj, and we look them up by input
 118     // section.  It is possible to have two different merge maps
 119     // associated with a single output section.  For example, this
 120     // happens routinely with .rodata, when merged string constants
 121     // and merged fixed size constants are both put into .rodata.  The
 122     // output offset that we store is not the offset from the start of
 123     // the output section; it is the offset from the start of the
 124     // merged data in the output section.  That means that the caller
 125     // is going to add the offset of the merged data within the output
 126     // section, which means that the caller needs to know which set of
 127     // merged data it found the entry in.  So it's not enough to find
 128     // this data based on the input section and the output section; we
 129     // also have to find it based on a set of merged data in the
 130     // output section.  In order to verify that we are looking at the
 131     // right data, we store a pointer to the Merge_map here, and we
 132     // pass in a pointer when looking at the data.  If we are asked to
 133     // look up information for a different Merge_map, we report that
 134     // we don't have it, rather than trying a lookup and returning an
 135     // answer which will receive the wrong offset.
 136     const Merge_map* merge_map;
 137     // The list of mappings.
 138     Entries entries;
 139     // Whether the ENTRIES field is sorted by input_offset.
 140     bool sorted;
 141
 142     Input_merge_map()
 143       : merge_map(NULL), entries(), sorted(true)
 144     { }
 145   };
 146
 147   // Map input section indices to merge maps.
 148   typedef std::map<unsigned int, Input_merge_map*> Section_merge_maps;
 149
 150   // Return a pointer to the Input_merge_map to use for the input
 151   // section SHNDX, or NULL.
 152   Input_merge_map*
 153   get_input_merge_map(unsigned int shndx);
 154
 155   // Get or make the the Input_merge_map to use for the section SHNDX
 156   // with MERGE_MAP.
 157   Input_merge_map*
 158   get_or_make_input_merge_map(const Merge_map* merge_map, unsigned int shndx);
 159
 160   // Any given object file will normally only have a couple of input
 161   // sections with mergeable contents.  So we keep the first two input
 162   // section numbers inline, and push any further ones into a map.  A
 163   // value of -1U in first_shnum_ or second_shnum_ means that we don't
 164   // have a corresponding entry.
 165   unsigned int first_shnum_;
 166   Input_merge_map first_map_;
 167   unsigned int second_shnum_;
 168   Input_merge_map second_map_;
 169   Section_merge_maps section_merge_maps_;
 170 };
 171
 172 // This class manages mappings from input sections to offsets in an
 173 // output section.  This is used where input sections are merged.  The
 174 // actual data is stored in fields in Object.
 175
 176 class Merge_map
 177 {
 178  public:
 179   Merge_map()
 180   { }
 181
 182   // Add a mapping for the bytes from OFFSET to OFFSET + LENGTH in the
 183   // input section SHNDX in object OBJECT to OUTPUT_OFFSET in the
 184   // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
 185   // discarded.  OUTPUT_OFFSET is not the offset from the start of the
 186   // output section, it is the offset from the start of the merged
 187   // data within the output section.
 188   void
 189   add_mapping(Relobj* object, unsigned int shndx,
 190               section_offset_type offset, section_size_type length,
 191               section_offset_type output_offset);
 192
 193   // Return the output offset for an input address.  The input address
 194   // is at offset OFFSET in section SHNDX in OBJECT.  This sets
 195   // *OUTPUT_OFFSET to the offset in the output section; this will be
 196   // -1 if the bytes are not being copied to the output.  This returns
 197   // true if the mapping is known, false otherwise.  This returns the
 198   // value stored by add_mapping, namely the offset from the start of
 199   // the merged data within the output section.
 200   bool
 201   get_output_offset(const Relobj* object, unsigned int shndx,
 202                     section_offset_type offset,
 203                     section_offset_type *output_offset) const;
 204
 205   // Return whether this is the merge mapping for section SHNDX in
 206   // OBJECT.  This should return true when get_output_offset would
 207   // return true for some input offset.
 208   bool
 209   is_merge_section_for(const Relobj* object, unsigned int shndx) const;
 210 };
 211
 212 // A general class for SHF_MERGE data, to hold functions shared by
 213 // fixed-size constant data and string data.
 214
 215 class Output_merge_base : public Output_section_data
 216 {
 217  public:
 218   Output_merge_base(uint64_t entsize, uint64_t addralign)
 219     : Output_section_data(addralign), merge_map_(), entsize_(entsize)
 220   { }
 221
 222  protected:
 223   // Return the output offset for an input offset.
 224   bool
 225   do_output_offset(const Relobj* object, unsigned int shndx,
 226                    section_offset_type offset,
 227                    section_offset_type* poutput) const;
 228
 229   // Return whether this is the merge section for an input section.
 230   bool
 231   do_is_merge_section_for(const Relobj*, unsigned int shndx) const;
 232
 233   // Return the entry size.
 234   uint64_t
 235   entsize() const
 236   { return this->entsize_; }
 237
 238   // Add a mapping from an OFFSET in input section SHNDX in object
 239   // OBJECT to an OUTPUT_OFFSET in the output section.  OUTPUT_OFFSET
 240   // is the offset from the start of the merged data in the output
 241   // section.
 242   void
 243   add_mapping(Relobj* object, unsigned int shndx, section_offset_type offset,
 244               section_size_type length, section_offset_type output_offset)
 245   {
 246     this->merge_map_.add_mapping(object, shndx, offset, length, output_offset);
 247   }
 248
 249  private:
 250   // A mapping from input object/section/offset to offset in output
 251   // section.
 252   Merge_map merge_map_;
 253   // The entry size.  For fixed-size constants, this is the size of
 254   // the constants.  For strings, this is the size of a character.
 255   uint64_t entsize_;
 256 };
 257
 258 // Handle SHF_MERGE sections with fixed-size constant data.
 259
 260 class Output_merge_data : public Output_merge_base
 261 {
 262  public:
 263   Output_merge_data(uint64_t entsize, uint64_t addralign)
 264     : Output_merge_base(entsize, addralign), p_(NULL), len_(0), alc_(0),
 265       input_count_(0),
 266       hashtable_(128, Merge_data_hash(this), Merge_data_eq(this))
 267   { }
 268
 269  protected:
 270   // Add an input section.
 271   bool
 272   do_add_input_section(Relobj* object, unsigned int shndx);
 273
 274   // Set the final data size.
 275   void
 276   set_final_data_size();
 277
 278   // Write the data to the file.
 279   void
 280   do_write(Output_file*);
 281
 282   // Write the data to a buffer.
 283   void
 284   do_write_to_buffer(unsigned char*);
 285
 286   // Print merge stats to stderr.
 287   void
 288   do_print_merge_stats(const char* section_name);
 289
 290  private:
 291   // We build a hash table of the fixed-size constants.  Each constant
 292   // is stored as a pointer into the section data we are accumulating.
 293
 294   // A key in the hash table.  This is an offset in the section
 295   // contents we are building.
 296   typedef section_offset_type Merge_data_key;
 297
 298   // Compute the hash code.  To do this we need a pointer back to the
 299   // object holding the data.
 300   class Merge_data_hash
 301   {
 302    public:
 303     Merge_data_hash(const Output_merge_data* pomd)
 304       : pomd_(pomd)
 305     { }
 306
 307     size_t
 308     operator()(Merge_data_key) const;
 309
 310    private:
 311     const Output_merge_data* pomd_;
 312   };
 313
 314   friend class Merge_data_hash;
 315
 316   // Compare two entries in the hash table for equality.  To do this
 317   // we need a pointer back to the object holding the data.  Note that
 318   // we now have a pointer to the object stored in two places in the
 319   // hash table.  Fixing this would require specializing the hash
 320   // table, which would be hard to do portably.
 321   class Merge_data_eq
 322   {
 323    public:
 324     Merge_data_eq(const Output_merge_data* pomd)
 325       : pomd_(pomd)
 326     { }
 327
 328     bool
 329     operator()(Merge_data_key k1, Merge_data_key k2) const;
 330
 331    private:
 332     const Output_merge_data* pomd_;
 333   };
 334
 335   friend class Merge_data_eq;
 336
 337   // The type of the hash table.
 338   typedef Unordered_set<Merge_data_key, Merge_data_hash, Merge_data_eq>
 339     Merge_data_hashtable;
 340
 341   // Given a hash table key, which is just an offset into the section
 342   // data, return a pointer to the corresponding constant.
 343   const unsigned char*
 344   constant(Merge_data_key k) const
 345   {
 346     gold_assert(k >= 0 && k < static_cast<section_offset_type>(this->len_));
 347     return this->p_ + k;
 348   }
 349
 350   // Add a constant to the output.
 351   void
 352   add_constant(const unsigned char*);
 353
 354   // The accumulated data.
 355   unsigned char* p_;
 356   // The length of the accumulated data.
 357   section_size_type len_;
 358   // The size of the allocated buffer.
 359   section_size_type alc_;
 360   // The number of entries seen in input files.
 361   size_t input_count_;
 362   // The hash table.
 363   Merge_data_hashtable hashtable_;
 364 };
 365
 366 // Handle SHF_MERGE sections with string data.  This is a template
 367 // based on the type of the characters in the string.
 368
 369 template<typename Char_type>
 370 class Output_merge_string : public Output_merge_base
 371 {
 372  public:
 373   Output_merge_string(uint64_t addralign)
 374     : Output_merge_base(sizeof(Char_type), addralign), stringpool_(),
 375       merged_strings_(), input_count_(0)
 376   {
 377     gold_assert(addralign <= sizeof(Char_type));
 378     this->stringpool_.set_no_zero_null();
 379   }
 380
 381  protected:
 382   // Add an input section.
 383   bool
 384   do_add_input_section(Relobj* object, unsigned int shndx);
 385
 386   // Do all the final processing after the input sections are read in.
 387   // Returns the final data size.
 388   section_size_type
 389   finalize_merged_data();
 390
 391   // Set the final data size.
 392   void
 393   set_final_data_size();
 394
 395   // Write the data to the file.
 396   void
 397   do_write(Output_file*);
 398
 399   // Write the data to a buffer.
 400   void
 401   do_write_to_buffer(unsigned char*);
 402
 403   // Print merge stats to stderr.
 404   void
 405   do_print_merge_stats(const char* section_name);
 406
 407   // Writes the stringpool to a buffer.
 408   void
 409   stringpool_to_buffer(unsigned char* buffer, section_size_type buffer_size)
 410   { this->stringpool_.write_to_buffer(buffer, buffer_size); }
 411
 412   // Clears all the data in the stringpool, to save on memory.
 413   void
 414   clear_stringpool()
 415   { this->stringpool_.clear(); }
 416
 417  private:
 418   // The name of the string type, for stats.
 419   const char*
 420   string_name();
 421
 422   // As we see input sections, we build a mapping from object, section
 423   // index and offset to strings.
 424   struct Merged_string
 425   {
 426     // The input object where the string was found.
 427     Relobj* object;
 428     // The input section in the input object.
 429     unsigned int shndx;
 430     // The offset in the input section.
 431     section_offset_type offset;
 432     // The string itself, a pointer into a Stringpool.
 433     const Char_type* string;
 434     // The length of the string in bytes, including the null terminator.
 435     size_t length;
 436     // The key in the Stringpool.
 437     Stringpool::Key stringpool_key;
 438
 439     Merged_string(Relobj *objecta, unsigned int shndxa,
 440                   section_offset_type offseta, const Char_type* stringa,
 441                   size_t lengtha, Stringpool::Key stringpool_keya)
 442       : object(objecta), shndx(shndxa), offset(offseta), string(stringa),
 443         length(lengtha), stringpool_key(stringpool_keya)
 444     { }
 445   };
 446
 447   typedef std::vector<Merged_string> Merged_strings;
 448
 449   // As we see the strings, we add them to a Stringpool.
 450   Stringpool_template<Char_type> stringpool_;
 451   // Map from a location in an input object to an entry in the
 452   // Stringpool.
 453   Merged_strings merged_strings_;
 454   // The number of entries seen in input files.
 455   size_t input_count_;
 456 };
 457
 458 } // End namespace gold.
 459
 460 #endif // !defined(GOLD_MERGE_H)