gold/script.cc

   1 // script.cc -- handle linker scripts for gold.
   2
   3 // Copyright 2006, 2007 Free Software Foundation, Inc.
   4 // Written by Ian Lance Taylor <iant@google.com>.
   5
   6 // This file is part of gold.
   7
   8 // This program is free software; you can redistribute it and/or modify
   9 // it under the terms of the GNU General Public License as published by
  10 // the Free Software Foundation; either version 3 of the License, or
  11 // (at your option) any later version.
  12
  13 // This program is distributed in the hope that it will be useful,
  14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 // GNU General Public License for more details.
  17
  18 // You should have received a copy of the GNU General Public License
  19 // along with this program; if not, write to the Free Software
  20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
  21 // MA 02110-1301, USA.
  22
  23 #include "gold.h"
  24
  25 #include <string>
  26 #include <vector>
  27 #include <cstdio>
  28 #include <cstdlib>
  29 #include "filenames.h"
  30
  31 #include "dirsearch.h"
  32 #include "options.h"
  33 #include "fileread.h"
  34 #include "workqueue.h"
  35 #include "readsyms.h"
  36 #include "parameters.h"
  37 #include "layout.h"
  38 #include "yyscript.h"
  39 #include "script.h"
  40 #include "script-c.h"
  41
  42 namespace gold
  43 {
  44
  45 // A token read from a script file.  We don't implement keywords here;
  46 // all keywords are simply represented as a string.
  47
  48 class Token
  49 {
  50  public:
  51   // Token classification.
  52   enum Classification
  53   {
  54     // Token is invalid.
  55     TOKEN_INVALID,
  56     // Token indicates end of input.
  57     TOKEN_EOF,
  58     // Token is a string of characters.
  59     TOKEN_STRING,
  60     // Token is an operator.
  61     TOKEN_OPERATOR,
  62     // Token is a number (an integer).
  63     TOKEN_INTEGER
  64   };
  65
  66   // We need an empty constructor so that we can put this STL objects.
  67   Token()
  68     : classification_(TOKEN_INVALID), value_(), opcode_(0),
  69       lineno_(0), charpos_(0)
  70   { }
  71
  72   // A general token with no value.
  73   Token(Classification classification, int lineno, int charpos)
  74     : classification_(classification), value_(), opcode_(0),
  75       lineno_(lineno), charpos_(charpos)
  76   {
  77     gold_assert(classification == TOKEN_INVALID
  78                 || classification == TOKEN_EOF);
  79   }
  80
  81   // A general token with a value.
  82   Token(Classification classification, const std::string& value,
  83         int lineno, int charpos)
  84     : classification_(classification), value_(value), opcode_(0),
  85       lineno_(lineno), charpos_(charpos)
  86   {
  87     gold_assert(classification != TOKEN_INVALID
  88                 && classification != TOKEN_EOF);
  89   }
  90
  91   // A token representing a string of characters.
  92   Token(const std::string& s, int lineno, int charpos)
  93     : classification_(TOKEN_STRING), value_(s), opcode_(0),
  94       lineno_(lineno), charpos_(charpos)
  95   { }
  96
  97   // A token representing an operator.
  98   Token(int opcode, int lineno, int charpos)
  99     : classification_(TOKEN_OPERATOR), value_(), opcode_(opcode),
 100       lineno_(lineno), charpos_(charpos)
 101   { }
 102
 103   // Return whether the token is invalid.
 104   bool
 105   is_invalid() const
 106   { return this->classification_ == TOKEN_INVALID; }
 107
 108   // Return whether this is an EOF token.
 109   bool
 110   is_eof() const
 111   { return this->classification_ == TOKEN_EOF; }
 112
 113   // Return the token classification.
 114   Classification
 115   classification() const
 116   { return this->classification_; }
 117
 118   // Return the line number at which the token starts.
 119   int
 120   lineno() const
 121   { return this->lineno_; }
 122
 123   // Return the character position at this the token starts.
 124   int
 125   charpos() const
 126   { return this->charpos_; }
 127
 128   // Get the value of a token.
 129
 130   const std::string&
 131   string_value() const
 132   {
 133     gold_assert(this->classification_ == TOKEN_STRING);
 134     return this->value_;
 135   }
 136
 137   int
 138   operator_value() const
 139   {
 140     gold_assert(this->classification_ == TOKEN_OPERATOR);
 141     return this->opcode_;
 142   }
 143
 144   int64_t
 145   integer_value() const
 146   {
 147     gold_assert(this->classification_ == TOKEN_INTEGER);
 148     return strtoll(this->value_.c_str(), NULL, 0);
 149   }
 150
 151  private:
 152   // The token classification.
 153   Classification classification_;
 154   // The token value, for TOKEN_STRING or TOKEN_INTEGER.
 155   std::string value_;
 156   // The token value, for TOKEN_OPERATOR.
 157   int opcode_;
 158   // The line number where this token started (one based).
 159   int lineno_;
 160   // The character position within the line where this token started
 161   // (one based).
 162   int charpos_;
 163 };
 164
 165 // This class handles lexing a file into a sequence of tokens.  We
 166 // don't expect linker scripts to be large, so we just read them and
 167 // tokenize them all at once.
 168
 169 class Lex
 170 {
 171  public:
 172   Lex(Input_file* input_file)
 173     : input_file_(input_file), tokens_()
 174   { }
 175
 176   // Tokenize the file.  Return the final token, which will be either
 177   // an invalid token or an EOF token.  An invalid token indicates
 178   // that tokenization failed.
 179   Token
 180   tokenize();
 181
 182   // A token sequence.
 183   typedef std::vector<Token> Token_sequence;
 184
 185   // Return the tokens.
 186   const Token_sequence&
 187   tokens() const
 188   { return this->tokens_; }
 189
 190  private:
 191   Lex(const Lex&);
 192   Lex& operator=(const Lex&);
 193
 194   // Read the file into a string buffer.
 195   void
 196   read_file(std::string*);
 197
 198   // Make a general token with no value at the current location.
 199   Token
 200   make_token(Token::Classification c, const char* p) const
 201   { return Token(c, this->lineno_, p - this->linestart_ + 1); }
 202
 203   // Make a general token with a value at the current location.
 204   Token
 205   make_token(Token::Classification c, const std::string& v, const char* p)
 206     const
 207   { return Token(c, v, this->lineno_, p - this->linestart_ + 1); }
 208
 209   // Make an operator token at the current location.
 210   Token
 211   make_token(int opcode, const char* p) const
 212   { return Token(opcode, this->lineno_, p - this->linestart_ + 1); }
 213
 214   // Make an invalid token at the current location.
 215   Token
 216   make_invalid_token(const char* p)
 217   { return this->make_token(Token::TOKEN_INVALID, p); }
 218
 219   // Make an EOF token at the current location.
 220   Token
 221   make_eof_token(const char* p)
 222   { return this->make_token(Token::TOKEN_EOF, p); }
 223
 224   // Return whether C can be the first character in a name.  C2 is the
 225   // next character, since we sometimes need that.
 226   static inline bool
 227   can_start_name(char c, char c2);
 228
 229   // Return whether C can appear in a name which has already started.
 230   static inline bool
 231   can_continue_name(char c);
 232
 233   // Return whether C, C2, C3 can start a hex number.
 234   static inline bool
 235   can_start_hex(char c, char c2, char c3);
 236
 237   // Return whether C can appear in a hex number.
 238   static inline bool
 239   can_continue_hex(char c);
 240
 241   // Return whether C can start a non-hex number.
 242   static inline bool
 243   can_start_number(char c);
 244
 245   // Return whether C can appear in a non-hex number.
 246   static inline bool
 247   can_continue_number(char c)
 248   { return Lex::can_start_number(c); }
 249
 250   // If C1 C2 C3 form a valid three character operator, return the
 251   // opcode.  Otherwise return 0.
 252   static inline int
 253   three_char_operator(char c1, char c2, char c3);
 254
 255   // If C1 C2 form a valid two character operator, return the opcode.
 256   // Otherwise return 0.
 257   static inline int
 258   two_char_operator(char c1, char c2);
 259
 260   // If C1 is a valid one character operator, return the opcode.
 261   // Otherwise return 0.
 262   static inline int
 263   one_char_operator(char c1);
 264
 265   // Read the next token.
 266   Token
 267   get_token(const char**);
 268
 269   // Skip a C style /* */ comment.  Return false if the comment did
 270   // not end.
 271   bool
 272   skip_c_comment(const char**);
 273
 274   // Skip a line # comment.  Return false if there was no newline.
 275   bool
 276   skip_line_comment(const char**);
 277
 278   // Build a token CLASSIFICATION from all characters that match
 279   // CAN_CONTINUE_FN.  The token starts at START.  Start matching from
 280   // MATCH.  Set *PP to the character following the token.
 281   inline Token
 282   gather_token(Token::Classification, bool (*can_continue_fn)(char),
 283                const char* start, const char* match, const char** pp);
 284
 285   // Build a token from a quoted string.
 286   Token
 287   gather_quoted_string(const char** pp);
 288
 289   // The file we are reading.
 290   Input_file* input_file_;
 291   // The token sequence we create.
 292   Token_sequence tokens_;
 293   // The current line number.
 294   int lineno_;
 295   // The start of the current line in the buffer.
 296   const char* linestart_;
 297 };
 298
 299 // Read the whole file into memory.  We don't expect linker scripts to
 300 // be large, so we just use a std::string as a buffer.  We ignore the
 301 // data we've already read, so that we read aligned buffers.
 302
 303 void
 304 Lex::read_file(std::string* contents)
 305 {
 306   off_t filesize = this->input_file_->file().filesize();
 307   contents->clear();
 308   contents->reserve(filesize);
 309
 310   off_t off = 0;
 311   unsigned char buf[BUFSIZ];
 312   while (off < filesize)
 313     {
 314       off_t get = BUFSIZ;
 315       if (get > filesize - off)
 316         get = filesize - off;
 317       this->input_file_->file().read(off, get, buf);
 318       contents->append(reinterpret_cast<char*>(&buf[0]), get);
 319       off += get;
 320     }
 321 }
 322
 323 // Return whether C can be the start of a name, if the next character
 324 // is C2.  A name can being with a letter, underscore, period, or
 325 // dollar sign.  Because a name can be a file name, we also permit
 326 // forward slash, backslash, and tilde.  Tilde is the tricky case
 327 // here; GNU ld also uses it as a bitwise not operator.  It is only
 328 // recognized as the operator if it is not immediately followed by
 329 // some character which can appear in a symbol.  That is, "~0" is a
 330 // symbol name, and "~ 0" is an expression using bitwise not.  We are
 331 // compatible.
 332
 333 inline bool
 334 Lex::can_start_name(char c, char c2)
 335 {
 336   switch (c)
 337     {
 338     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 339     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 340     case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
 341     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 342     case 'Y': case 'Z':
 343     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 344     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 345     case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
 346     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 347     case 'y': case 'z':
 348     case '_': case '.': case '$': case '/': case '\\':
 349       return true;
 350
 351     case '~':
 352       return can_continue_name(c2);
 353
 354     default:
 355       return false;
 356     }
 357 }
 358
 359 // Return whether C can continue a name which has already started.
 360 // Subsequent characters in a name are the same as the leading
 361 // characters, plus digits and "=+-:[],?*".  So in general the linker
 362 // script language requires spaces around operators.
 363
 364 inline bool
 365 Lex::can_continue_name(char c)
 366 {
 367   switch (c)
 368     {
 369     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 370     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 371     case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
 372     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 373     case 'Y': case 'Z':
 374     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 375     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 376     case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
 377     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 378     case 'y': case 'z':
 379     case '_': case '.': case '$': case '/': case '\\':
 380     case '~':
 381     case '0': case '1': case '2': case '3': case '4':
 382     case '5': case '6': case '7': case '8': case '9':
 383     case '=': case '+': case '-': case ':': case '[': case ']':
 384     case ',': case '?': case '*':
 385       return true;
 386
 387     default:
 388       return false;
 389     }
 390 }
 391
 392 // For a number we accept 0x followed by hex digits, or any sequence
 393 // of digits.  The old linker accepts leading '$' for hex, and
 394 // trailing HXBOD.  Those are for MRI compatibility and we don't
 395 // accept them.  The old linker also accepts trailing MK for mega or
 396 // kilo.  Those are mentioned in the documentation, and we accept
 397 // them.
 398
 399 // Return whether C1 C2 C3 can start a hex number.
 400
 401 inline bool
 402 Lex::can_start_hex(char c1, char c2, char c3)
 403 {
 404   if (c1 == '0' && (c2 == 'x' || c2 == 'X'))
 405     return Lex::can_continue_hex(c3);
 406   return false;
 407 }
 408
 409 // Return whether C can appear in a hex number.
 410
 411 inline bool
 412 Lex::can_continue_hex(char c)
 413 {
 414   switch (c)
 415     {
 416     case '0': case '1': case '2': case '3': case '4':
 417     case '5': case '6': case '7': case '8': case '9':
 418     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 419     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 420       return true;
 421
 422     default:
 423       return false;
 424     }
 425 }
 426
 427 // Return whether C can start a non-hex number.
 428
 429 inline bool
 430 Lex::can_start_number(char c)
 431 {
 432   switch (c)
 433     {
 434     case '0': case '1': case '2': case '3': case '4':
 435     case '5': case '6': case '7': case '8': case '9':
 436       return true;
 437
 438     default:
 439       return false;
 440     }
 441 }
 442
 443 // If C1 C2 C3 form a valid three character operator, return the
 444 // opcode (defined in the yyscript.h file generated from yyscript.y).
 445 // Otherwise return 0.
 446
 447 inline int
 448 Lex::three_char_operator(char c1, char c2, char c3)
 449 {
 450   switch (c1)
 451     {
 452     case '<':
 453       if (c2 == '<' && c3 == '=')
 454         return LSHIFTEQ;
 455       break;
 456     case '>':
 457       if (c2 == '>' && c3 == '=')
 458         return RSHIFTEQ;
 459       break;
 460     default:
 461       break;
 462     }
 463   return 0;
 464 }
 465
 466 // If C1 C2 form a valid two character operator, return the opcode
 467 // (defined in the yyscript.h file generated from yyscript.y).
 468 // Otherwise return 0.
 469
 470 inline int
 471 Lex::two_char_operator(char c1, char c2)
 472 {
 473   switch (c1)
 474     {
 475     case '=':
 476       if (c2 == '=')
 477         return EQ;
 478       break;
 479     case '!':
 480       if (c2 == '=')
 481         return NE;
 482       break;
 483     case '+':
 484       if (c2 == '=')
 485         return PLUSEQ;
 486       break;
 487     case '-':
 488       if (c2 == '=')
 489         return MINUSEQ;
 490       break;
 491     case '*':
 492       if (c2 == '=')
 493         return MULTEQ;
 494       break;
 495     case '/':
 496       if (c2 == '=')
 497         return DIVEQ;
 498       break;
 499     case '|':
 500       if (c2 == '=')
 501         return OREQ;
 502       if (c2 == '|')
 503         return OROR;
 504       break;
 505     case '&':
 506       if (c2 == '=')
 507         return ANDEQ;
 508       if (c2 == '&')
 509         return ANDAND;
 510       break;
 511     case '>':
 512       if (c2 == '=')
 513         return GE;
 514       if (c2 == '>')
 515         return RSHIFT;
 516       break;
 517     case '<':
 518       if (c2 == '=')
 519         return LE;
 520       if (c2 == '<')
 521         return LSHIFT;
 522       break;
 523     default:
 524       break;
 525     }
 526   return 0;
 527 }
 528
 529 // If C1 is a valid operator, return the opcode.  Otherwise return 0.
 530
 531 inline int
 532 Lex::one_char_operator(char c1)
 533 {
 534   switch (c1)
 535     {
 536     case '+':
 537     case '-':
 538     case '*':
 539     case '/':
 540     case '%':
 541     case '!':
 542     case '&':
 543     case '|':
 544     case '^':
 545     case '~':
 546     case '<':
 547     case '>':
 548     case '=':
 549     case '?':
 550     case ',':
 551     case '(':
 552     case ')':
 553     case '{':
 554     case '}':
 555     case '[':
 556     case ']':
 557     case ':':
 558     case ';':
 559       return c1;
 560     default:
 561       return 0;
 562     }
 563 }
 564
 565 // Skip a C style comment.  *PP points to just after the "/*".  Return
 566 // false if the comment did not end.
 567
 568 bool
 569 Lex::skip_c_comment(const char** pp)
 570 {
 571   const char* p = *pp;
 572   while (p[0] != '*' || p[1] != '/')
 573     {
 574       if (*p == '\0')
 575         {
 576           *pp = p;
 577           return false;
 578         }
 579
 580       if (*p == '\n')
 581         {
 582           ++this->lineno_;
 583           this->linestart_ = p + 1;
 584         }
 585       ++p;
 586     }
 587
 588   *pp = p + 2;
 589   return true;
 590 }
 591
 592 // Skip a line # comment.  Return false if there was no newline.
 593
 594 bool
 595 Lex::skip_line_comment(const char** pp)
 596 {
 597   const char* p = *pp;
 598   size_t skip = strcspn(p, "\n");
 599   if (p[skip] == '\0')
 600     {
 601       *pp = p + skip;
 602       return false;
 603     }
 604
 605   p += skip + 1;
 606   ++this->lineno_;
 607   this->linestart_ = p;
 608   *pp = p;
 609
 610   return true;
 611 }
 612
 613 // Build a token CLASSIFICATION from all characters that match
 614 // CAN_CONTINUE_FN.  Update *PP.
 615
 616 inline Token
 617 Lex::gather_token(Token::Classification classification,
 618                   bool (*can_continue_fn)(char),
 619                   const char* start,
 620                   const char* match,
 621                   const char **pp)
 622 {
 623   while ((*can_continue_fn)(*match))
 624     ++match;
 625   *pp = match;
 626   return this->make_token(classification,
 627                           std::string(start, match - start),
 628                           start);
 629 }
 630
 631 // Build a token from a quoted string.
 632
 633 Token
 634 Lex::gather_quoted_string(const char** pp)
 635 {
 636   const char* start = *pp;
 637   const char* p = start;
 638   ++p;
 639   size_t skip = strcspn(p, "\"\n");
 640   if (p[skip] != '"')
 641     return this->make_invalid_token(start);
 642   *pp = p + skip + 1;
 643   return this->make_token(Token::TOKEN_STRING,
 644                           std::string(p, skip),
 645                           start);
 646 }
 647
 648 // Return the next token at *PP.  Update *PP.  General guideline: we
 649 // require linker scripts to be simple ASCII.  No unicode linker
 650 // scripts.  In particular we can assume that any '\0' is the end of
 651 // the input.
 652
 653 Token
 654 Lex::get_token(const char** pp)
 655 {
 656   const char* p = *pp;
 657
 658   while (true)
 659     {
 660       if (*p == '\0')
 661         {
 662           *pp = p;
 663           return this->make_eof_token(p);
 664         }
 665
 666       // Skip whitespace quickly.
 667       while (*p == ' ' || *p == '\t')
 668         ++p;
 669
 670       if (*p == '\n')
 671         {
 672           ++p;
 673           ++this->lineno_;
 674           this->linestart_ = p;
 675           continue;
 676         }
 677
 678       // Skip C style comments.
 679       if (p[0] == '/' && p[1] == '*')
 680         {
 681           int lineno = this->lineno_;
 682           int charpos = p - this->linestart_ + 1;
 683
 684           *pp = p + 2;
 685           if (!this->skip_c_comment(pp))
 686             return Token(Token::TOKEN_INVALID, lineno, charpos);
 687           p = *pp;
 688
 689           continue;
 690         }
 691
 692       // Skip line comments.
 693       if (*p == '#')
 694         {
 695           *pp = p + 1;
 696           if (!this->skip_line_comment(pp))
 697             return this->make_eof_token(p);
 698           p = *pp;
 699           continue;
 700         }
 701
 702       // Check for a name.
 703       if (Lex::can_start_name(p[0], p[1]))
 704         return this->gather_token(Token::TOKEN_STRING,
 705                                   Lex::can_continue_name,
 706                                   p, p + 2, pp);
 707
 708       // We accept any arbitrary name in double quotes, as long as it
 709       // does not cross a line boundary.
 710       if (*p == '"')
 711         {
 712           *pp = p;
 713           return this->gather_quoted_string(pp);
 714         }
 715
 716       // Check for a number.
 717
 718       if (Lex::can_start_hex(p[0], p[1], p[2]))
 719         return this->gather_token(Token::TOKEN_INTEGER,
 720                                   Lex::can_continue_hex,
 721                                   p, p + 3, pp);
 722
 723       if (Lex::can_start_number(p[0]))
 724         return this->gather_token(Token::TOKEN_INTEGER,
 725                                   Lex::can_continue_number,
 726                                   p, p + 1, pp);
 727
 728       // Check for operators.
 729
 730       int opcode = Lex::three_char_operator(p[0], p[1], p[2]);
 731       if (opcode != 0)
 732         {
 733           *pp = p + 3;
 734           return this->make_token(opcode, p);
 735         }
 736
 737       opcode = Lex::two_char_operator(p[0], p[1]);
 738       if (opcode != 0)
 739         {
 740           *pp = p + 2;
 741           return this->make_token(opcode, p);
 742         }
 743
 744       opcode = Lex::one_char_operator(p[0]);
 745       if (opcode != 0)
 746         {
 747           *pp = p + 1;
 748           return this->make_token(opcode, p);
 749         }
 750
 751       return this->make_token(Token::TOKEN_INVALID, p);
 752     }
 753 }
 754
 755 // Tokenize the file.  Return the final token.
 756
 757 Token
 758 Lex::tokenize()
 759 {
 760   std::string contents;
 761   this->read_file(&contents);
 762
 763   const char* p = contents.c_str();
 764
 765   this->lineno_ = 1;
 766   this->linestart_ = p;
 767
 768   while (true)
 769     {
 770       Token t(this->get_token(&p));
 771
 772       // Don't let an early null byte fool us into thinking that we've
 773       // reached the end of the file.
 774       if (t.is_eof()
 775           && static_cast<size_t>(p - contents.c_str()) < contents.length())
 776         t = this->make_invalid_token(p);
 777
 778       if (t.is_invalid() || t.is_eof())
 779         return t;
 780
 781       this->tokens_.push_back(t);
 782     }
 783 }
 784
 785 // A trivial task which waits for THIS_BLOCKER to be clear and then
 786 // clears NEXT_BLOCKER.  THIS_BLOCKER may be NULL.
 787
 788 class Script_unblock : public Task
 789 {
 790  public:
 791   Script_unblock(Task_token* this_blocker, Task_token* next_blocker)
 792     : this_blocker_(this_blocker), next_blocker_(next_blocker)
 793   { }
 794
 795   ~Script_unblock()
 796   {
 797     if (this->this_blocker_ != NULL)
 798       delete this->this_blocker_;
 799   }
 800
 801   Task_token*
 802   is_runnable()
 803   {
 804     if (this->this_blocker_ != NULL && this->this_blocker_->is_blocked())
 805       return this->this_blocker_;
 806     return NULL;
 807   }
 808
 809   void
 810   locks(Task_locker* tl)
 811   { tl->add(this, this->next_blocker_); }
 812
 813   void
 814   run(Workqueue*)
 815   { }
 816
 817   std::string
 818   get_name() const
 819   { return "Script_unblock"; }
 820
 821  private:
 822   Task_token* this_blocker_;
 823   Task_token* next_blocker_;
 824 };
 825
 826 // This class holds data passed through the parser to the lexer and to
 827 // the parser support functions.  This avoids global variables.  We
 828 // can't use global variables because we need not be called by a
 829 // singleton thread.
 830
 831 class Parser_closure
 832 {
 833  public:
 834   Parser_closure(const char* filename,
 835                  const Position_dependent_options& posdep_options,
 836                  bool in_group, bool is_in_sysroot,
 837                  Command_line* command_line,
 838                  Layout* layout,
 839                  const Lex::Token_sequence* tokens)
 840     : filename_(filename), posdep_options_(posdep_options),
 841       in_group_(in_group), is_in_sysroot_(is_in_sysroot),
 842       command_line_(command_line), layout_(layout), tokens_(tokens),
 843       next_token_index_(0), inputs_(NULL)
 844   { }
 845
 846   // Return the file name.
 847   const char*
 848   filename() const
 849   { return this->filename_; }
 850
 851   // Return the position dependent options.  The caller may modify
 852   // this.
 853   Position_dependent_options&
 854   position_dependent_options()
 855   { return this->posdep_options_; }
 856
 857   // Return whether this script is being run in a group.
 858   bool
 859   in_group() const
 860   { return this->in_group_; }
 861
 862   // Return whether this script was found using a directory in the
 863   // sysroot.
 864   bool
 865   is_in_sysroot() const
 866   { return this->is_in_sysroot_; }
 867
 868   // Returns the Command_line structure passed in at constructor time.
 869   // This value may be NULL.  The caller may modify this, which modifies
 870   // the passed-in Command_line object (not a copy).
 871   Command_line* command_line()
 872   { return this->command_line_; }
 873
 874   // Return the Layout structure passed in at constructor time.  This
 875   // value may be NULL.
 876   Layout* layout()
 877   { return this->layout_; }
 878
 879   // Whether we are at the end of the token list.
 880   bool
 881   at_eof() const
 882   { return this->next_token_index_ >= this->tokens_->size(); }
 883
 884   // Return the next token, and advance.
 885   const Token*
 886   next_token()
 887   {
 888     const Token* ret = &(*this->tokens_)[this->next_token_index_];
 889     ++this->next_token_index_;
 890     return ret;
 891   }
 892
 893   // Return the previous token.
 894   const Token*
 895   last_token() const
 896   {
 897     gold_assert(this->next_token_index_ > 0);
 898     return &(*this->tokens_)[this->next_token_index_ - 1];
 899   }
 900
 901   // Return the list of input files, creating it if necessary.  This
 902   // is a space leak--we never free the INPUTS_ pointer.
 903   Input_arguments*
 904   inputs()
 905   {
 906     if (this->inputs_ == NULL)
 907       this->inputs_ = new Input_arguments();
 908     return this->inputs_;
 909   }
 910
 911   // Return whether we saw any input files.
 912   bool
 913   saw_inputs() const
 914   { return this->inputs_ != NULL && !this->inputs_->empty(); }
 915
 916  private:
 917   // The name of the file we are reading.
 918   const char* filename_;
 919   // The position dependent options.
 920   Position_dependent_options posdep_options_;
 921   // Whether we are currently in a --start-group/--end-group.
 922   bool in_group_;
 923   // Whether the script was found in a sysrooted directory.
 924   bool is_in_sysroot_;
 925   // May be NULL if the user chooses not to pass one in.
 926   Command_line* command_line_;
 927   // May be NULL if the user chooses not to pass one in.
 928   Layout* layout_;
 929
 930   // The tokens to be returned by the lexer.
 931   const Lex::Token_sequence* tokens_;
 932   // The index of the next token to return.
 933   unsigned int next_token_index_;
 934   // New input files found to add to the link.
 935   Input_arguments* inputs_;
 936 };
 937
 938 // FILE was found as an argument on the command line.  Try to read it
 939 // as a script.  We've already read BYTES of data into P, but we
 940 // ignore that.  Return true if the file was handled.
 941
 942 bool
 943 read_input_script(Workqueue* workqueue, const General_options& options,
 944                   Symbol_table* symtab, Layout* layout,
 945                   Dirsearch* dirsearch, Input_objects* input_objects,
 946                   Input_group* input_group,
 947                   const Input_argument* input_argument,
 948                   Input_file* input_file, const unsigned char*, off_t,
 949                   Task_token* this_blocker, Task_token* next_blocker)
 950 {
 951   Lex lex(input_file);
 952   if (lex.tokenize().is_invalid())
 953     return false;
 954
 955   Parser_closure closure(input_file->filename().c_str(),
 956                          input_argument->file().options(),
 957                          input_group != NULL,
 958                          input_file->is_in_sysroot(),
 959                          NULL,
 960                          layout,
 961                          &lex.tokens());
 962
 963   if (yyparse(&closure) != 0)
 964     return false;
 965
 966   // THIS_BLOCKER must be clear before we may add anything to the
 967   // symbol table.  We are responsible for unblocking NEXT_BLOCKER
 968   // when we are done.  We are responsible for deleting THIS_BLOCKER
 969   // when it is unblocked.
 970
 971   if (!closure.saw_inputs())
 972     {
 973       // The script did not add any files to read.  Note that we are
 974       // not permitted to call NEXT_BLOCKER->unblock() here even if
 975       // THIS_BLOCKER is NULL, as we do not hold the workqueue lock.
 976       workqueue->queue(new Script_unblock(this_blocker, next_blocker));
 977       return true;
 978     }
 979
 980   for (Input_arguments::const_iterator p = closure.inputs()->begin();
 981        p != closure.inputs()->end();
 982        ++p)
 983     {
 984       Task_token* nb;
 985       if (p + 1 == closure.inputs()->end())
 986         nb = next_blocker;
 987       else
 988         {
 989           nb = new Task_token(true);
 990           nb->add_blocker();
 991         }
 992       workqueue->queue(new Read_symbols(options, input_objects, symtab,
 993                                         layout, dirsearch, &*p,
 994                                         input_group, this_blocker, nb));
 995       this_blocker = nb;
 996     }
 997
 998   return true;
 999 }
1000
1001 // FILENAME was found as an argument to --script (-T).
1002 // Read it as a script, and execute its contents immediately.
1003
1004 bool
1005 read_commandline_script(const char* filename, Command_line* cmdline)
1006 {
1007   // TODO: if filename is a relative filename, search for it manually
1008   // using "." + cmdline->options()->search_path() -- not dirsearch.
1009   Dirsearch dirsearch;
1010
1011   // The file locking code wants to record a Task, but we haven't
1012   // started the workqueue yet.  This is only for debugging purposes,
1013   // so we invent a fake value.
1014   const Task* task = reinterpret_cast<const Task*>(-1);
1015
1016   Input_file_argument input_argument(filename, false, "",
1017                                      cmdline->position_dependent_options());
1018   Input_file input_file(&input_argument);
1019   if (!input_file.open(cmdline->options(), dirsearch, task))
1020     return false;
1021
1022   Lex lex(&input_file);
1023   if (lex.tokenize().is_invalid())
1024     {
1025       // Opening the file locked it, so now we need to unlock it.
1026       input_file.file().unlock(task);
1027       return false;
1028     }
1029
1030   Parser_closure closure(filename,
1031                          cmdline->position_dependent_options(),
1032                          false,
1033                          input_file.is_in_sysroot(),
1034                          cmdline,
1035                          NULL,
1036                          &lex.tokens());
1037   if (yyparse(&closure) != 0)
1038     {
1039       input_file.file().unlock(task);
1040       return false;
1041     }
1042
1043   input_file.file().unlock(task);
1044
1045   gold_assert(!closure.saw_inputs());
1046
1047   return true;
1048 }
1049
1050 // Manage mapping from keywords to the codes expected by the bison
1051 // parser.
1052
1053 class Keyword_to_parsecode
1054 {
1055  public:
1056   // The structure which maps keywords to parsecodes.
1057   struct Keyword_parsecode
1058   {
1059     // Keyword.
1060     const char* keyword;
1061     // Corresponding parsecode.
1062     int parsecode;
1063   };
1064
1065   // Return the parsecode corresponding KEYWORD, or 0 if it is not a
1066   // keyword.
1067   static int
1068   keyword_to_parsecode(const char* keyword);
1069
1070  private:
1071   // The array of all keywords.
1072   static const Keyword_parsecode keyword_parsecodes_[];
1073
1074   // The number of keywords.
1075   static const int keyword_count;
1076 };
1077
1078 // Mapping from keyword string to keyword parsecode.  This array must
1079 // be kept in sorted order.  Parsecodes are looked up using bsearch.
1080 // This array must correspond to the list of parsecodes in yyscript.y.
1081
1082 const Keyword_to_parsecode::Keyword_parsecode
1083 Keyword_to_parsecode::keyword_parsecodes_[] =
1084 {
1085   { "ABSOLUTE", ABSOLUTE },
1086   { "ADDR", ADDR },
1087   { "ALIGN", ALIGN_K },
1088   { "ASSERT", ASSERT_K },
1089   { "AS_NEEDED", AS_NEEDED },
1090   { "AT", AT },
1091   { "BIND", BIND },
1092   { "BLOCK", BLOCK },
1093   { "BYTE", BYTE },
1094   { "CONSTANT", CONSTANT },
1095   { "CONSTRUCTORS", CONSTRUCTORS },
1096   { "COPY", COPY },
1097   { "CREATE_OBJECT_SYMBOLS", CREATE_OBJECT_SYMBOLS },
1098   { "DATA_SEGMENT_ALIGN", DATA_SEGMENT_ALIGN },
1099   { "DATA_SEGMENT_END", DATA_SEGMENT_END },
1100   { "DATA_SEGMENT_RELRO_END", DATA_SEGMENT_RELRO_END },
1101   { "DEFINED", DEFINED },
1102   { "DSECT", DSECT },
1103   { "ENTRY", ENTRY },
1104   { "EXCLUDE_FILE", EXCLUDE_FILE },
1105   { "EXTERN", EXTERN },
1106   { "FILL", FILL },
1107   { "FLOAT", FLOAT },
1108   { "FORCE_COMMON_ALLOCATION", FORCE_COMMON_ALLOCATION },
1109   { "GROUP", GROUP },
1110   { "HLL", HLL },
1111   { "INCLUDE", INCLUDE },
1112   { "INFO", INFO },
1113   { "INHIBIT_COMMON_ALLOCATION", INHIBIT_COMMON_ALLOCATION },
1114   { "INPUT", INPUT },
1115   { "KEEP", KEEP },
1116   { "LENGTH", LENGTH },
1117   { "LOADADDR", LOADADDR },
1118   { "LONG", LONG },
1119   { "MAP", MAP },
1120   { "MAX", MAX_K },
1121   { "MEMORY", MEMORY },
1122   { "MIN", MIN_K },
1123   { "NEXT", NEXT },
1124   { "NOCROSSREFS", NOCROSSREFS },
1125   { "NOFLOAT", NOFLOAT },
1126   { "NOLOAD", NOLOAD },
1127   { "ONLY_IF_RO", ONLY_IF_RO },
1128   { "ONLY_IF_RW", ONLY_IF_RW },
1129   { "OPTION", OPTION },
1130   { "ORIGIN", ORIGIN },
1131   { "OUTPUT", OUTPUT },
1132   { "OUTPUT_ARCH", OUTPUT_ARCH },
1133   { "OUTPUT_FORMAT", OUTPUT_FORMAT },
1134   { "OVERLAY", OVERLAY },
1135   { "PHDRS", PHDRS },
1136   { "PROVIDE", PROVIDE },
1137   { "PROVIDE_HIDDEN", PROVIDE_HIDDEN },
1138   { "QUAD", QUAD },
1139   { "SEARCH_DIR", SEARCH_DIR },
1140   { "SECTIONS", SECTIONS },
1141   { "SEGMENT_START", SEGMENT_START },
1142   { "SHORT", SHORT },
1143   { "SIZEOF", SIZEOF },
1144   { "SIZEOF_HEADERS", SIZEOF_HEADERS },
1145   { "SORT_BY_ALIGNMENT", SORT_BY_ALIGNMENT },
1146   { "SORT_BY_NAME", SORT_BY_NAME },
1147   { "SPECIAL", SPECIAL },
1148   { "SQUAD", SQUAD },
1149   { "STARTUP", STARTUP },
1150   { "SUBALIGN", SUBALIGN },
1151   { "SYSLIB", SYSLIB },
1152   { "TARGET", TARGET_K },
1153   { "TRUNCATE", TRUNCATE },
1154   { "VERSION", VERSIONK },
1155   { "global", GLOBAL },
1156   { "l", LENGTH },
1157   { "len", LENGTH },
1158   { "local", LOCAL },
1159   { "o", ORIGIN },
1160   { "org", ORIGIN },
1161   { "sizeof_headers", SIZEOF_HEADERS },
1162 };
1163
1164 const int Keyword_to_parsecode::keyword_count =
1165   (sizeof(Keyword_to_parsecode::keyword_parsecodes_)
1166    / sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]));
1167
1168 // Comparison function passed to bsearch.
1169
1170 extern "C"
1171 {
1172
1173 static int
1174 ktt_compare(const void* keyv, const void* kttv)
1175 {
1176   const char* key = static_cast<const char*>(keyv);
1177   const Keyword_to_parsecode::Keyword_parsecode* ktt =
1178     static_cast<const Keyword_to_parsecode::Keyword_parsecode*>(kttv);
1179   return strcmp(key, ktt->keyword);
1180 }
1181
1182 } // End extern "C".
1183
1184 int
1185 Keyword_to_parsecode::keyword_to_parsecode(const char* keyword)
1186 {
1187   void* kttv = bsearch(keyword,
1188                        Keyword_to_parsecode::keyword_parsecodes_,
1189                        Keyword_to_parsecode::keyword_count,
1190                        sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]),
1191                        ktt_compare);
1192   if (kttv == NULL)
1193     return 0;
1194   Keyword_parsecode* ktt = static_cast<Keyword_parsecode*>(kttv);
1195   return ktt->parsecode;
1196 }
1197
1198 } // End namespace gold.
1199
1200 // The remaining functions are extern "C", so it's clearer to not put
1201 // them in namespace gold.
1202
1203 using namespace gold;
1204
1205 // This function is called by the bison parser to return the next
1206 // token.
1207
1208 extern "C" int
1209 yylex(YYSTYPE* lvalp, void* closurev)
1210 {
1211   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1212
1213   if (closure->at_eof())
1214     return 0;
1215
1216   const Token* token = closure->next_token();
1217
1218   switch (token->classification())
1219     {
1220     default:
1221     case Token::TOKEN_INVALID:
1222     case Token::TOKEN_EOF:
1223       gold_unreachable();
1224
1225     case Token::TOKEN_STRING:
1226       {
1227         const char* str = token->string_value().c_str();
1228         int parsecode = Keyword_to_parsecode::keyword_to_parsecode(str);
1229         if (parsecode != 0)
1230           return parsecode;
1231         lvalp->string = str;
1232         return STRING;
1233       }
1234
1235     case Token::TOKEN_OPERATOR:
1236       return token->operator_value();
1237
1238     case Token::TOKEN_INTEGER:
1239       lvalp->integer = token->integer_value();
1240       return INTEGER;
1241     }
1242 }
1243
1244 // This function is called by the bison parser to report an error.
1245
1246 extern "C" void
1247 yyerror(void* closurev, const char* message)
1248 {
1249   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1250
1251   const Token* token = closure->last_token();
1252   gold_error(_("%s:%d:%d: %s"), closure->filename(), token->lineno(),
1253              token->charpos(), message);
1254 }
1255
1256 // Called by the bison parser to add a file to the link.
1257
1258 extern "C" void
1259 script_add_file(void* closurev, const char* name)
1260 {
1261   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1262
1263   // If this is an absolute path, and we found the script in the
1264   // sysroot, then we want to prepend the sysroot to the file name.
1265   // For example, this is how we handle a cross link to the x86_64
1266   // libc.so, which refers to /lib/libc.so.6.
1267   std::string name_string;
1268   const char* extra_search_path = ".";
1269   std::string script_directory;
1270   if (IS_ABSOLUTE_PATH (name))
1271     {
1272       if (closure->is_in_sysroot())
1273         {
1274           const std::string& sysroot(parameters->sysroot());
1275           gold_assert(!sysroot.empty());
1276           name_string = sysroot + name;
1277           name = name_string.c_str();
1278         }
1279     }
1280   else
1281     {
1282       // In addition to checking the normal library search path, we
1283       // also want to check in the script-directory.
1284       const char *slash = strrchr(closure->filename(), '/');
1285       if (slash != NULL)
1286         {
1287           script_directory.assign(closure->filename(),
1288                                   slash - closure->filename() + 1);
1289           extra_search_path = script_directory.c_str();
1290         }
1291     }
1292
1293   Input_file_argument file(name, false, extra_search_path,
1294                            closure->position_dependent_options());
1295   closure->inputs()->add_file(file);
1296 }
1297
1298 // Called by the bison parser to start a group.  If we are already in
1299 // a group, that means that this script was invoked within a
1300 // --start-group --end-group sequence on the command line, or that
1301 // this script was found in a GROUP of another script.  In that case,
1302 // we simply continue the existing group, rather than starting a new
1303 // one.  It is possible to construct a case in which this will do
1304 // something other than what would happen if we did a recursive group,
1305 // but it's hard to imagine why the different behaviour would be
1306 // useful for a real program.  Avoiding recursive groups is simpler
1307 // and more efficient.
1308
1309 extern "C" void
1310 script_start_group(void* closurev)
1311 {
1312   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1313   if (!closure->in_group())
1314     closure->inputs()->start_group();
1315 }
1316
1317 // Called by the bison parser at the end of a group.
1318
1319 extern "C" void
1320 script_end_group(void* closurev)
1321 {
1322   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1323   if (!closure->in_group())
1324     closure->inputs()->end_group();
1325 }
1326
1327 // Called by the bison parser to start an AS_NEEDED list.
1328
1329 extern "C" void
1330 script_start_as_needed(void* closurev)
1331 {
1332   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1333   closure->position_dependent_options().set_as_needed();
1334 }
1335
1336 // Called by the bison parser at the end of an AS_NEEDED list.
1337
1338 extern "C" void
1339 script_end_as_needed(void* closurev)
1340 {
1341   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1342   closure->position_dependent_options().clear_as_needed();
1343 }
1344
1345 // Called by the bison parser to set the entry symbol.
1346
1347 extern "C" void
1348 script_set_entry(void* closurev, const char* entry)
1349 {
1350   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1351   if (closure->command_line() != NULL)
1352     closure->command_line()->set_entry(entry);
1353   else
1354     closure->layout()->set_entry(entry);
1355 }
1356
1357 // Called by the bison parser to parse an OPTION.
1358
1359 extern "C" void
1360 script_parse_option(void* closurev, const char* option)
1361 {
1362   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1363   // We treat the option as a single command-line option, even if
1364   // it has internal whitespace.
1365   if (closure->command_line() == NULL)
1366     {
1367       // There are some options that we could handle here--e.g.,
1368       // -lLIBRARY.  Should we bother?
1369       gold_warning(_("%s: ignoring command OPTION; OPTION is only valid"
1370                      " for scripts specified via -T/--script"),
1371                    closure->filename());
1372     }
1373   else
1374     {
1375       bool past_a_double_dash_option = false;
1376       char* mutable_option = strdup(option);
1377       closure->command_line()->process_one_option(1, &mutable_option, 0,
1378                                                   &past_a_double_dash_option);
1379       free(mutable_option);
1380     }
1381 }