gold/script.cc

   1 // script.cc -- handle linker scripts for gold.
   2
   3 #include "gold.h"
   4
   5 #include <string>
   6 #include <vector>
   7 #include <cassert>
   8 #include <cstdio>
   9 #include <cstdlib>
  10
  11 #include "options.h"
  12 #include "fileread.h"
  13 #include "workqueue.h"
  14 #include "readsyms.h"
  15 #include "yyscript.h"
  16 #include "script.h"
  17 #include "script-c.h"
  18
  19 namespace gold
  20 {
  21
  22 // A token read from a script file.  We don't implement keywords here;
  23 // all keywords are simply represented as a string.
  24
  25 class Token
  26 {
  27  public:
  28   // Token classification.
  29   enum Classification
  30   {
  31     // Token is invalid.
  32     TOKEN_INVALID,
  33     // Token indicates end of input.
  34     TOKEN_EOF,
  35     // Token is a string of characters.
  36     TOKEN_STRING,
  37     // Token is an operator.
  38     TOKEN_OPERATOR,
  39     // Token is a number (an integer).
  40     TOKEN_INTEGER
  41   };
  42
  43   // We need an empty constructor so that we can put this STL objects.
  44   Token()
  45     : classification_(TOKEN_INVALID), value_(), opcode_(0),
  46       lineno_(0), charpos_(0)
  47   { }
  48
  49   // A general token with no value.
  50   Token(Classification classification, int lineno, int charpos)
  51     : classification_(classification), value_(), opcode_(0),
  52       lineno_(lineno), charpos_(charpos)
  53   { assert(classification == TOKEN_INVALID || classification == TOKEN_EOF); }
  54
  55   // A general token with a value.
  56   Token(Classification classification, const std::string& value,
  57         int lineno, int charpos)
  58     : classification_(classification), value_(value), opcode_(0),
  59       lineno_(lineno), charpos_(charpos)
  60   { assert(classification != TOKEN_INVALID && classification != TOKEN_EOF); }
  61
  62   // A token representing a string of characters.
  63   Token(const std::string& s, int lineno, int charpos)
  64     : classification_(TOKEN_STRING), value_(s), opcode_(0),
  65       lineno_(lineno), charpos_(charpos)
  66   { }
  67
  68   // A token representing an operator.
  69   Token(int opcode, int lineno, int charpos)
  70     : classification_(TOKEN_OPERATOR), value_(), opcode_(opcode),
  71       lineno_(lineno), charpos_(charpos)
  72   { }
  73
  74   // Return whether the token is invalid.
  75   bool
  76   is_invalid() const
  77   { return this->classification_ == TOKEN_INVALID; }
  78
  79   // Return whether this is an EOF token.
  80   bool
  81   is_eof() const
  82   { return this->classification_ == TOKEN_EOF; }
  83
  84   // Return the token classification.
  85   Classification
  86   classification() const
  87   { return this->classification_; }
  88
  89   // Return the line number at which the token starts.
  90   int
  91   lineno() const
  92   { return this->lineno_; }
  93
  94   // Return the character position at this the token starts.
  95   int
  96   charpos() const
  97   { return this->charpos_; }
  98
  99   // Get the value of a token.
 100
 101   const std::string&
 102   string_value() const
 103   {
 104     assert(this->classification_ == TOKEN_STRING);
 105     return this->value_;
 106   }
 107
 108   int
 109   operator_value() const
 110   {
 111     assert(this->classification_ == TOKEN_OPERATOR);
 112     return this->opcode_;
 113   }
 114
 115   int64_t
 116   integer_value() const
 117   {
 118     assert(this->classification_ == TOKEN_INTEGER);
 119     return strtoll(this->value_.c_str(), NULL, 0);
 120   }
 121
 122  private:
 123   // The token classification.
 124   Classification classification_;
 125   // The token value, for TOKEN_STRING or TOKEN_INTEGER.
 126   std::string value_;
 127   // The token value, for TOKEN_OPERATOR.
 128   int opcode_;
 129   // The line number where this token started (one based).
 130   int lineno_;
 131   // The character position within the line where this token started
 132   // (one based).
 133   int charpos_;
 134 };
 135
 136 // This class handles lexing a file into a sequence of tokens.  We
 137 // don't expect linker scripts to be large, so we just read them and
 138 // tokenize them all at once.
 139
 140 class Lex
 141 {
 142  public:
 143   Lex(Input_file* input_file)
 144     : input_file_(input_file), tokens_()
 145   { }
 146
 147   // Tokenize the file.  Return the final token, which will be either
 148   // an invalid token or an EOF token.  An invalid token indicates
 149   // that tokenization failed.
 150   Token
 151   tokenize();
 152
 153   // A token sequence.
 154   typedef std::vector<Token> Token_sequence;
 155
 156   // Return the tokens.
 157   const Token_sequence&
 158   tokens() const
 159   { return this->tokens_; }
 160
 161  private:
 162   Lex(const Lex&);
 163   Lex& operator=(const Lex&);
 164
 165   // Read the file into a string buffer.
 166   void
 167   read_file(std::string*);
 168
 169   // Make a general token with no value at the current location.
 170   Token
 171   make_token(Token::Classification c, const char* p) const
 172   { return Token(c, this->lineno_, p - this->linestart_ + 1); }
 173
 174   // Make a general token with a value at the current location.
 175   Token
 176   make_token(Token::Classification c, const std::string& v, const char* p)
 177     const
 178   { return Token(c, v, this->lineno_, p - this->linestart_ + 1); }
 179
 180   // Make an operator token at the current location.
 181   Token
 182   make_token(int opcode, const char* p) const
 183   { return Token(opcode, this->lineno_, p - this->linestart_ + 1); }
 184
 185   // Make an invalid token at the current location.
 186   Token
 187   make_invalid_token(const char* p)
 188   { return this->make_token(Token::TOKEN_INVALID, p); }
 189
 190   // Make an EOF token at the current location.
 191   Token
 192   make_eof_token(const char* p)
 193   { return this->make_token(Token::TOKEN_EOF, p); }
 194
 195   // Return whether C can be the first character in a name.  C2 is the
 196   // next character, since we sometimes need that.
 197   static inline bool
 198   can_start_name(char c, char c2);
 199
 200   // Return whether C can appear in a name which has already started.
 201   static inline bool
 202   can_continue_name(char c);
 203
 204   // Return whether C, C2, C3 can start a hex number.
 205   static inline bool
 206   can_start_hex(char c, char c2, char c3);
 207
 208   // Return whether C can appear in a hex number.
 209   static inline bool
 210   can_continue_hex(char c);
 211
 212   // Return whether C can start a non-hex number.
 213   static inline bool
 214   can_start_number(char c);
 215
 216   // Return whether C can appear in a non-hex number.
 217   static inline bool
 218   can_continue_number(char c)
 219   { return Lex::can_start_number(c); }
 220
 221   // If C1 C2 C3 form a valid three character operator, return the
 222   // opcode.  Otherwise return 0.
 223   static inline int
 224   three_char_operator(char c1, char c2, char c3);
 225
 226   // If C1 C2 form a valid two character operator, return the opcode.
 227   // Otherwise return 0.
 228   static inline int
 229   two_char_operator(char c1, char c2);
 230
 231   // If C1 is a valid one character operator, return the opcode.
 232   // Otherwise return 0.
 233   static inline int
 234   one_char_operator(char c1);
 235
 236   // Read the next token.
 237   Token
 238   get_token(const char**);
 239
 240   // Skip a C style /* */ comment.  Return false if the comment did
 241   // not end.
 242   bool
 243   skip_c_comment(const char**);
 244
 245   // Skip a line # comment.  Return false if there was no newline.
 246   bool
 247   skip_line_comment(const char**);
 248
 249   // Build a token CLASSIFICATION from all characters that match
 250   // CAN_CONTINUE_FN.  The token starts at START.  Start matching from
 251   // MATCH.  Set *PP to the character following the token.
 252   inline Token
 253   gather_token(Token::Classification, bool (*can_continue_fn)(char),
 254                const char* start, const char* match, const char** pp);
 255
 256   // Build a token from a quoted string.
 257   Token
 258   gather_quoted_string(const char** pp);
 259
 260   // The file we are reading.
 261   Input_file* input_file_;
 262   // The token sequence we create.
 263   Token_sequence tokens_;
 264   // The current line number.
 265   int lineno_;
 266   // The start of the current line in the buffer.
 267   const char* linestart_;
 268 };
 269
 270 // Read the whole file into memory.  We don't expect linker scripts to
 271 // be large, so we just use a std::string as a buffer.  We ignore the
 272 // data we've already read, so that we read aligned buffers.
 273
 274 void
 275 Lex::read_file(std::string* contents)
 276 {
 277   contents->clear();
 278   off_t off = 0;
 279   off_t got;
 280   unsigned char buf[BUFSIZ];
 281   do
 282     {
 283       this->input_file_->file().read(off, sizeof buf, buf, &got);
 284       contents->append(reinterpret_cast<char*>(&buf[0]), got);
 285     }
 286   while (got == sizeof buf);
 287 }
 288
 289 // Return whether C can be the start of a name, if the next character
 290 // is C2.  A name can being with a letter, underscore, period, or
 291 // dollar sign.  Because a name can be a file name, we also permit
 292 // forward slash, backslash, and tilde.  Tilde is the tricky case
 293 // here; GNU ld also uses it as a bitwise not operator.  It is only
 294 // recognized as the operator if it is not immediately followed by
 295 // some character which can appear in a symbol.  That is, "~0" is a
 296 // symbol name, and "~ 0" is an expression using bitwise not.  We are
 297 // compatible.
 298
 299 inline bool
 300 Lex::can_start_name(char c, char c2)
 301 {
 302   switch (c)
 303     {
 304     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 305     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 306     case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
 307     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 308     case 'Y': case 'Z':
 309     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 310     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 311     case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
 312     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 313     case 'y': case 'z':
 314     case '_': case '.': case '$': case '/': case '\\':
 315       return true;
 316
 317     case '~':
 318       return can_continue_name(c2);
 319
 320     default:
 321       return false;
 322     }
 323 }
 324
 325 // Return whether C can continue a name which has already started.
 326 // Subsequent characters in a name are the same as the leading
 327 // characters, plus digits and "=+-:[],?*".  So in general the linker
 328 // script language requires spaces around operators.
 329
 330 inline bool
 331 Lex::can_continue_name(char c)
 332 {
 333   switch (c)
 334     {
 335     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 336     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 337     case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
 338     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 339     case 'Y': case 'Z':
 340     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 341     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 342     case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
 343     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 344     case 'y': case 'z':
 345     case '_': case '.': case '$': case '/': case '\\':
 346     case '~':
 347     case '0': case '1': case '2': case '3': case '4':
 348     case '5': case '6': case '7': case '8': case '9':
 349     case '=': case '+': case '-': case ':': case '[': case ']':
 350     case ',': case '?': case '*':
 351       return true;
 352
 353     default:
 354       return false;
 355     }
 356 }
 357
 358 // For a number we accept 0x followed by hex digits, or any sequence
 359 // of digits.  The old linker accepts leading '$' for hex, and
 360 // trailing HXBOD.  Those are for MRI compatibility and we don't
 361 // accept them.  The old linker also accepts trailing MK for mega or
 362 // kilo.  Those are mentioned in the documentation, and we accept
 363 // them.
 364
 365 // Return whether C1 C2 C3 can start a hex number.
 366
 367 inline bool
 368 Lex::can_start_hex(char c1, char c2, char c3)
 369 {
 370   if (c1 == '0' && (c2 == 'x' || c2 == 'X'))
 371     return Lex::can_continue_hex(c3);
 372   return false;
 373 }
 374
 375 // Return whether C can appear in a hex number.
 376
 377 inline bool
 378 Lex::can_continue_hex(char c)
 379 {
 380   switch (c)
 381     {
 382     case '0': case '1': case '2': case '3': case '4':
 383     case '5': case '6': case '7': case '8': case '9':
 384     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 385     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 386       return true;
 387
 388     default:
 389       return false;
 390     }
 391 }
 392
 393 // Return whether C can start a non-hex number.
 394
 395 inline bool
 396 Lex::can_start_number(char c)
 397 {
 398   switch (c)
 399     {
 400     case '0': case '1': case '2': case '3': case '4':
 401     case '5': case '6': case '7': case '8': case '9':
 402       return true;
 403
 404     default:
 405       return false;
 406     }
 407 }
 408
 409 // If C1 C2 C3 form a valid three character operator, return the
 410 // opcode (defined in the yyscript.h file generated from yyscript.y).
 411 // Otherwise return 0.
 412
 413 inline int
 414 Lex::three_char_operator(char c1, char c2, char c3)
 415 {
 416   switch (c1)
 417     {
 418     case '<':
 419       if (c2 == '<' && c3 == '=')
 420         return LSHIFTEQ;
 421       break;
 422     case '>':
 423       if (c2 == '>' && c3 == '=')
 424         return RSHIFTEQ;
 425       break;
 426     default:
 427       break;
 428     }
 429   return 0;
 430 }
 431
 432 // If C1 C2 form a valid two character operator, return the opcode
 433 // (defined in the yyscript.h file generated from yyscript.y).
 434 // Otherwise return 0.
 435
 436 inline int
 437 Lex::two_char_operator(char c1, char c2)
 438 {
 439   switch (c1)
 440     {
 441     case '=':
 442       if (c2 == '=')
 443         return EQ;
 444       break;
 445     case '!':
 446       if (c2 == '=')
 447         return NE;
 448       break;
 449     case '+':
 450       if (c2 == '=')
 451         return PLUSEQ;
 452       break;
 453     case '-':
 454       if (c2 == '=')
 455         return MINUSEQ;
 456       break;
 457     case '*':
 458       if (c2 == '=')
 459         return MULTEQ;
 460       break;
 461     case '/':
 462       if (c2 == '=')
 463         return DIVEQ;
 464       break;
 465     case '|':
 466       if (c2 == '=')
 467         return OREQ;
 468       if (c2 == '|')
 469         return OROR;
 470       break;
 471     case '&':
 472       if (c2 == '=')
 473         return ANDEQ;
 474       if (c2 == '&')
 475         return ANDAND;
 476       break;
 477     case '>':
 478       if (c2 == '=')
 479         return GE;
 480       if (c2 == '>')
 481         return RSHIFT;
 482       break;
 483     case '<':
 484       if (c2 == '=')
 485         return LE;
 486       if (c2 == '<')
 487         return LSHIFT;
 488       break;
 489     default:
 490       break;
 491     }
 492   return 0;
 493 }
 494
 495 // If C1 is a valid operator, return the opcode.  Otherwise return 0.
 496
 497 inline int
 498 Lex::one_char_operator(char c1)
 499 {
 500   switch (c1)
 501     {
 502     case '+':
 503     case '-':
 504     case '*':
 505     case '/':
 506     case '%':
 507     case '!':
 508     case '&':
 509     case '|':
 510     case '^':
 511     case '~':
 512     case '<':
 513     case '>':
 514     case '=':
 515     case '?':
 516     case ',':
 517     case '(':
 518     case ')':
 519     case '{':
 520     case '}':
 521     case '[':
 522     case ']':
 523     case ':':
 524     case ';':
 525       return c1;
 526     default:
 527       return 0;
 528     }
 529 }
 530
 531 // Skip a C style comment.  *PP points to just after the "/*".  Return
 532 // false if the comment did not end.
 533
 534 bool
 535 Lex::skip_c_comment(const char** pp)
 536 {
 537   const char* p = *pp;
 538   while (p[0] != '*' || p[1] != '/')
 539     {
 540       if (*p == '\0')
 541         {
 542           *pp = p;
 543           return false;
 544         }
 545
 546       if (*p == '\n')
 547         {
 548           ++this->lineno_;
 549           this->linestart_ = p + 1;
 550         }
 551       ++p;
 552     }
 553
 554   *pp = p + 2;
 555   return true;
 556 }
 557
 558 // Skip a line # comment.  Return false if there was no newline.
 559
 560 bool
 561 Lex::skip_line_comment(const char** pp)
 562 {
 563   const char* p = *pp;
 564   size_t skip = strcspn(p, "\n");
 565   if (p[skip] == '\0')
 566     {
 567       *pp = p + skip;
 568       return false;
 569     }
 570
 571   p += skip + 1;
 572   ++this->lineno_;
 573   this->linestart_ = p;
 574   *pp = p;
 575
 576   return true;
 577 }
 578
 579 // Build a token CLASSIFICATION from all characters that match
 580 // CAN_CONTINUE_FN.  Update *PP.
 581
 582 inline Token
 583 Lex::gather_token(Token::Classification classification,
 584                   bool (*can_continue_fn)(char),
 585                   const char* start,
 586                   const char* match,
 587                   const char **pp)
 588 {
 589   while ((*can_continue_fn)(*match))
 590     ++match;
 591   *pp = match;
 592   return this->make_token(classification,
 593                           std::string(start, match - start),
 594                           start);
 595 }
 596
 597 // Build a token from a quoted string.
 598
 599 Token
 600 Lex::gather_quoted_string(const char** pp)
 601 {
 602   const char* start = *pp;
 603   const char* p = start;
 604   ++p;
 605   size_t skip = strcspn(p, "\"\n");
 606   if (p[skip] != '"')
 607     return this->make_invalid_token(start);
 608   *pp = p + skip + 1;
 609   return this->make_token(Token::TOKEN_STRING,
 610                           std::string(p, skip),
 611                           start);
 612 }
 613
 614 // Return the next token at *PP.  Update *PP.  General guideline: we
 615 // require linker scripts to be simple ASCII.  No unicode linker
 616 // scripts.  In particular we can assume that any '\0' is the end of
 617 // the input.
 618
 619 Token
 620 Lex::get_token(const char** pp)
 621 {
 622   const char* p = *pp;
 623
 624   while (true)
 625     {
 626       if (*p == '\0')
 627         {
 628           *pp = p;
 629           return this->make_eof_token(p);
 630         }
 631
 632       // Skip whitespace quickly.
 633       while (*p == ' ' || *p == '\t')
 634         ++p;
 635
 636       if (*p == '\n')
 637         {
 638           ++p;
 639           ++this->lineno_;
 640           this->linestart_ = p;
 641           continue;
 642         }
 643
 644       // Skip C style comments.
 645       if (p[0] == '/' && p[1] == '*')
 646         {
 647           int lineno = this->lineno_;
 648           int charpos = p - this->linestart_ + 1;
 649
 650           *pp = p + 2;
 651           if (!this->skip_c_comment(pp))
 652             return Token(Token::TOKEN_INVALID, lineno, charpos);
 653           p = *pp;
 654
 655           continue;
 656         }
 657
 658       // Skip line comments.
 659       if (*p == '#')
 660         {
 661           *pp = p + 1;
 662           if (!this->skip_line_comment(pp))
 663             return this->make_eof_token(p);
 664           p = *pp;
 665           continue;
 666         }
 667
 668       // Check for a name.
 669       if (Lex::can_start_name(p[0], p[1]))
 670         return this->gather_token(Token::TOKEN_STRING,
 671                                   Lex::can_continue_name,
 672                                   p, p + 2, pp);
 673
 674       // We accept any arbitrary name in double quotes, as long as it
 675       // does not cross a line boundary.
 676       if (*p == '"')
 677         {
 678           *pp = p;
 679           return this->gather_quoted_string(pp);
 680         }
 681
 682       // Check for a number.
 683
 684       if (Lex::can_start_hex(p[0], p[1], p[2]))
 685         return this->gather_token(Token::TOKEN_INTEGER,
 686                                   Lex::can_continue_hex,
 687                                   p, p + 3, pp);
 688
 689       if (Lex::can_start_number(p[0]))
 690         return this->gather_token(Token::TOKEN_INTEGER,
 691                                   Lex::can_continue_number,
 692                                   p, p + 1, pp);
 693
 694       // Check for operators.
 695
 696       int opcode = Lex::three_char_operator(p[0], p[1], p[2]);
 697       if (opcode != 0)
 698         {
 699           *pp = p + 3;
 700           return this->make_token(opcode, p);
 701         }
 702
 703       opcode = Lex::two_char_operator(p[0], p[1]);
 704       if (opcode != 0)
 705         {
 706           *pp = p + 2;
 707           return this->make_token(opcode, p);
 708         }
 709
 710       opcode = Lex::one_char_operator(p[0]);
 711       if (opcode != 0)
 712         {
 713           *pp = p + 1;
 714           return this->make_token(opcode, p);
 715         }
 716
 717       return this->make_token(Token::TOKEN_INVALID, p);
 718     }
 719 }
 720
 721 // Tokenize the file.  Return the final token.
 722
 723 Token
 724 Lex::tokenize()
 725 {
 726   std::string contents;
 727   this->read_file(&contents);
 728
 729   const char* p = contents.c_str();
 730
 731   this->lineno_ = 1;
 732   this->linestart_ = p;
 733
 734   while (true)
 735     {
 736       Token t(this->get_token(&p));
 737
 738       // Don't let an early null byte fool us into thinking that we've
 739       // reached the end of the file.
 740       if (t.is_eof()
 741           && static_cast<size_t>(p - contents.c_str()) < contents.length())
 742         t = this->make_invalid_token(p);
 743
 744       if (t.is_invalid() || t.is_eof())
 745         return t;
 746
 747       this->tokens_.push_back(t);
 748     }
 749 }
 750
 751 // A trivial task which waits for THIS_BLOCKER to be clear and then
 752 // clears NEXT_BLOCKER.  THIS_BLOCKER may be NULL.
 753
 754 class Script_unblock : public Task
 755 {
 756  public:
 757   Script_unblock(Task_token* this_blocker, Task_token* next_blocker)
 758     : this_blocker_(this_blocker), next_blocker_(next_blocker)
 759   { }
 760
 761   ~Script_unblock()
 762   {
 763     if (this->this_blocker_ != NULL)
 764       delete this->this_blocker_;
 765   }
 766
 767   Is_runnable_type
 768   is_runnable(Workqueue*)
 769   {
 770     if (this->this_blocker_ != NULL && this->this_blocker_->is_blocked())
 771       return IS_BLOCKED;
 772     return IS_RUNNABLE;
 773   }
 774
 775   Task_locker*
 776   locks(Workqueue* workqueue)
 777   {
 778     return new Task_locker_block(*this->next_blocker_, workqueue);
 779   }
 780
 781   void
 782   run(Workqueue*)
 783   { }
 784
 785  private:
 786   Task_token* this_blocker_;
 787   Task_token* next_blocker_;
 788 };
 789
 790 // This class holds data passed through the parser to the lexer and to
 791 // the parser support functions.  This avoids global variables.  We
 792 // can't use global variables because we need not be called in the
 793 // main thread.
 794
 795 class Parser_closure
 796 {
 797  public:
 798   Parser_closure(const char* filename,
 799                  const Position_dependent_options& posdep_options,
 800                  bool in_group,
 801                  const Lex::Token_sequence* tokens)
 802     : filename_(filename), posdep_options_(posdep_options),
 803       in_group_(in_group), tokens_(tokens),
 804       next_token_index_(0), inputs_(NULL)
 805   { }
 806
 807   // Return the file name.
 808   const char*
 809   filename() const
 810   { return this->filename_; }
 811
 812   // Return the position dependent options.  The caller may modify
 813   // this.
 814   Position_dependent_options&
 815   position_dependent_options()
 816   { return this->posdep_options_; }
 817
 818   // Return whether this script is being run in a group.
 819   bool
 820   in_group() const
 821   { return this->in_group_; }
 822
 823   // Whether we are at the end of the token list.
 824   bool
 825   at_eof() const
 826   { return this->next_token_index_ >= this->tokens_->size(); }
 827
 828   // Return the next token.
 829   const Token*
 830   next_token()
 831   {
 832     const Token* ret = &(*this->tokens_)[this->next_token_index_];
 833     ++this->next_token_index_;
 834     return ret;
 835   }
 836
 837   // Return the list of input files, creating it if necessary.  This
 838   // is a space leak--we never free the INPUTS_ pointer.
 839   Input_arguments*
 840   inputs()
 841   {
 842     if (this->inputs_ == NULL)
 843       this->inputs_ = new Input_arguments();
 844     return this->inputs_;
 845   }
 846
 847   // Return whether we saw any input files.
 848   bool
 849   saw_inputs() const
 850   { return this->inputs_ != NULL && !this->inputs_->empty(); }
 851
 852  private:
 853   // The name of the file we are reading.
 854   const char* filename_;
 855   // The position dependent options.
 856   Position_dependent_options posdep_options_;
 857   // Whether we are currently in a --start-group/--end-group.
 858   bool in_group_;
 859
 860   // The tokens to be returned by the lexer.
 861   const Lex::Token_sequence* tokens_;
 862   // The index of the next token to return.
 863   unsigned int next_token_index_;
 864   // New input files found to add to the link.
 865   Input_arguments* inputs_;
 866 };
 867
 868 // FILE was found as an argument on the command line.  Try to read it
 869 // as a script.  We've already read BYTES of data into P, but we
 870 // ignore that.  Return true if the file was handled.
 871
 872 bool
 873 read_input_script(Workqueue* workqueue, const General_options& options,
 874                   Symbol_table* symtab, Layout* layout,
 875                   const Dirsearch& dirsearch, Input_objects* input_objects,
 876                   Input_group* input_group,
 877                   const Input_argument* input_argument,
 878                   Input_file* input_file, const unsigned char*, off_t,
 879                   Task_token* this_blocker, Task_token* next_blocker)
 880 {
 881   Lex lex(input_file);
 882   if (lex.tokenize().is_invalid())
 883     return false;
 884
 885   Parser_closure closure(input_file->filename().c_str(),
 886                          input_argument->file().options(),
 887                          input_group != NULL,
 888                          &lex.tokens());
 889
 890   if (yyparse(&closure) != 0)
 891     return false;
 892
 893   // THIS_BLOCKER must be clear before we may add anything to the
 894   // symbol table.  We are responsible for unblocking NEXT_BLOCKER
 895   // when we are done.  We are responsible for deleting THIS_BLOCKER
 896   // when it is unblocked.
 897
 898   if (!closure.saw_inputs())
 899     {
 900       // The script did not add any files to read.  Note that we are
 901       // not permitted to call NEXT_BLOCKER->unblock() here even if
 902       // THIS_BLOCKER is NULL, as we are not in the main thread.
 903       workqueue->queue(new Script_unblock(this_blocker, next_blocker));
 904       return true;
 905     }
 906
 907   for (Input_arguments::const_iterator p = closure.inputs()->begin();
 908        p != closure.inputs()->end();
 909        ++p)
 910     {
 911       Task_token* nb;
 912       if (p + 1 == closure.inputs()->end())
 913         nb = next_blocker;
 914       else
 915         {
 916           nb = new Task_token();
 917           nb->add_blocker();
 918         }
 919       workqueue->queue(new Read_symbols(options, input_objects, symtab,
 920                                         layout, dirsearch, &*p,
 921                                         input_group, this_blocker, nb));
 922       this_blocker = nb;
 923     }
 924
 925   return true;
 926 }
 927
 928 // Manage mapping from keywords to the codes expected by the bison
 929 // parser.
 930
 931 class Keyword_to_parsecode
 932 {
 933  public:
 934   // The structure which maps keywords to parsecodes.
 935   struct Keyword_parsecode
 936   {
 937     // Keyword.
 938     const char* keyword;
 939     // Corresponding parsecode.
 940     int parsecode;
 941   };
 942
 943   // Return the parsecode corresponding KEYWORD, or 0 if it is not a
 944   // keyword.
 945   static int
 946   keyword_to_parsecode(const char* keyword);
 947
 948  private:
 949   // The array of all keywords.
 950   static const Keyword_parsecode keyword_parsecodes_[];
 951
 952   // The number of keywords.
 953   static const int keyword_count;
 954 };
 955
 956 // Mapping from keyword string to keyword parsecode.  This array must
 957 // be kept in sorted order.  Parsecodes are looked up using bsearch.
 958 // This array must correspond to the list of parsecodes in yyscript.y.
 959
 960 const Keyword_to_parsecode::Keyword_parsecode
 961 Keyword_to_parsecode::keyword_parsecodes_[] =
 962 {
 963   { "ABSOLUTE", ABSOLUTE },
 964   { "ADDR", ADDR },
 965   { "ALIGN", ALIGN_K },
 966   { "ASSERT", ASSERT_K },
 967   { "AS_NEEDED", AS_NEEDED },
 968   { "AT", AT },
 969   { "BIND", BIND },
 970   { "BLOCK", BLOCK },
 971   { "BYTE", BYTE },
 972   { "CONSTANT", CONSTANT },
 973   { "CONSTRUCTORS", CONSTRUCTORS },
 974   { "COPY", COPY },
 975   { "CREATE_OBJECT_SYMBOLS", CREATE_OBJECT_SYMBOLS },
 976   { "DATA_SEGMENT_ALIGN", DATA_SEGMENT_ALIGN },
 977   { "DATA_SEGMENT_END", DATA_SEGMENT_END },
 978   { "DATA_SEGMENT_RELRO_END", DATA_SEGMENT_RELRO_END },
 979   { "DEFINED", DEFINED },
 980   { "DSECT", DSECT },
 981   { "ENTRY", ENTRY },
 982   { "EXCLUDE_FILE", EXCLUDE_FILE },
 983   { "EXTERN", EXTERN },
 984   { "FILL", FILL },
 985   { "FLOAT", FLOAT },
 986   { "FORCE_COMMON_ALLOCATION", FORCE_COMMON_ALLOCATION },
 987   { "GROUP", GROUP },
 988   { "HLL", HLL },
 989   { "INCLUDE", INCLUDE },
 990   { "INFO", INFO },
 991   { "INHIBIT_COMMON_ALLOCATION", INHIBIT_COMMON_ALLOCATION },
 992   { "INPUT", INPUT },
 993   { "KEEP", KEEP },
 994   { "LENGTH", LENGTH },
 995   { "LOADADDR", LOADADDR },
 996   { "LONG", LONG },
 997   { "MAP", MAP },
 998   { "MAX", MAX_K },
 999   { "MEMORY", MEMORY },
1000   { "MIN", MIN_K },
1001   { "NEXT", NEXT },
1002   { "NOCROSSREFS", NOCROSSREFS },
1003   { "NOFLOAT", NOFLOAT },
1004   { "NOLOAD", NOLOAD },
1005   { "ONLY_IF_RO", ONLY_IF_RO },
1006   { "ONLY_IF_RW", ONLY_IF_RW },
1007   { "ORIGIN", ORIGIN },
1008   { "OUTPUT", OUTPUT },
1009   { "OUTPUT_ARCH", OUTPUT_ARCH },
1010   { "OUTPUT_FORMAT", OUTPUT_FORMAT },
1011   { "OVERLAY", OVERLAY },
1012   { "PHDRS", PHDRS },
1013   { "PROVIDE", PROVIDE },
1014   { "PROVIDE_HIDDEN", PROVIDE_HIDDEN },
1015   { "QUAD", QUAD },
1016   { "SEARCH_DIR", SEARCH_DIR },
1017   { "SECTIONS", SECTIONS },
1018   { "SEGMENT_START", SEGMENT_START },
1019   { "SHORT", SHORT },
1020   { "SIZEOF", SIZEOF },
1021   { "SIZEOF_HEADERS", SIZEOF_HEADERS },
1022   { "SORT_BY_ALIGNMENT", SORT_BY_ALIGNMENT },
1023   { "SORT_BY_NAME", SORT_BY_NAME },
1024   { "SPECIAL", SPECIAL },
1025   { "SQUAD", SQUAD },
1026   { "STARTUP", STARTUP },
1027   { "SUBALIGN", SUBALIGN },
1028   { "SYSLIB", SYSLIB },
1029   { "TARGET", TARGET_K },
1030   { "TRUNCATE", TRUNCATE },
1031   { "VERSION", VERSIONK },
1032   { "global", GLOBAL },
1033   { "l", LENGTH },
1034   { "len", LENGTH },
1035   { "local", LOCAL },
1036   { "o", ORIGIN },
1037   { "org", ORIGIN },
1038   { "sizeof_headers", SIZEOF_HEADERS },
1039 };
1040
1041 const int Keyword_to_parsecode::keyword_count =
1042   (sizeof(Keyword_to_parsecode::keyword_parsecodes_)
1043    / sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]));
1044
1045 // Comparison function passed to bsearch.
1046
1047 extern "C"
1048 {
1049
1050 static int
1051 ktt_compare(const void* keyv, const void* kttv)
1052 {
1053   const char* key = static_cast<const char*>(keyv);
1054   const Keyword_to_parsecode::Keyword_parsecode* ktt =
1055     static_cast<const Keyword_to_parsecode::Keyword_parsecode*>(kttv);
1056   return strcmp(key, ktt->keyword);
1057 }
1058
1059 } // End extern "C".
1060
1061 int
1062 Keyword_to_parsecode::keyword_to_parsecode(const char* keyword)
1063 {
1064   void* kttv = bsearch(keyword,
1065                        Keyword_to_parsecode::keyword_parsecodes_,
1066                        Keyword_to_parsecode::keyword_count,
1067                        sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]),
1068                        ktt_compare);
1069   if (kttv == NULL)
1070     return 0;
1071   Keyword_parsecode* ktt = static_cast<Keyword_parsecode*>(kttv);
1072   return ktt->parsecode;
1073 }
1074
1075 } // End namespace gold.
1076
1077 // The remaining functions are extern "C", so it's clearer to not put
1078 // them in namespace gold.
1079
1080 using namespace gold;
1081
1082 // This function is called by the bison parser to return the next
1083 // token.
1084
1085 extern "C" int
1086 yylex(YYSTYPE* lvalp, void* closurev)
1087 {
1088   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1089
1090   if (closure->at_eof())
1091     return 0;
1092
1093   const Token* token = closure->next_token();
1094
1095   switch (token->classification())
1096     {
1097     default:
1098     case Token::TOKEN_INVALID:
1099     case Token::TOKEN_EOF:
1100       abort();
1101
1102     case Token::TOKEN_STRING:
1103       {
1104         const char* str = token->string_value().c_str();
1105         int parsecode = Keyword_to_parsecode::keyword_to_parsecode(str);
1106         if (parsecode != 0)
1107           return parsecode;
1108         lvalp->string = str;
1109         return STRING;
1110       }
1111
1112     case Token::TOKEN_OPERATOR:
1113       return token->operator_value();
1114
1115     case Token::TOKEN_INTEGER:
1116       lvalp->integer = token->integer_value();
1117       return INTEGER;
1118     }
1119 }
1120
1121 // This function is called by the bison parser to report an error.
1122
1123 extern "C" void
1124 yyerror(void* closurev, const char* message)
1125 {
1126   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1127
1128   fprintf(stderr, _("%s: %s: %s\n"),
1129           program_name, closure->filename(), message);
1130   gold_exit(false);
1131 }
1132
1133 // Called by the bison parser to add a file to the link.
1134
1135 extern "C" void
1136 script_add_file(void* closurev, const char* name)
1137 {
1138   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1139   Input_file_argument file(name, false, closure->position_dependent_options());
1140   closure->inputs()->add_file(file);
1141 }
1142
1143 // Called by the bison parser to start a group.  If we are already in
1144 // a group, that means that this script was invoked within a
1145 // --start-group --end-group sequence on the command line, or that
1146 // this script was found in a GROUP of another script.  In that case,
1147 // we simply continue the existing group, rather than starting a new
1148 // one.  It is possible to construct a case in which this will do
1149 // something other than what would happen if we did a recursive group,
1150 // but it's hard to imagine why the different behaviour would be
1151 // useful for a real program.  Avoiding recursive groups is simpler
1152 // and more efficient.
1153
1154 extern "C" void
1155 script_start_group(void* closurev)
1156 {
1157   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1158   if (!closure->in_group())
1159     closure->inputs()->start_group();
1160 }
1161
1162 // Called by the bison parser at the end of a group.
1163
1164 extern "C" void
1165 script_end_group(void* closurev)
1166 {
1167   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1168   if (!closure->in_group())
1169     closure->inputs()->end_group();
1170 }
1171
1172 // Called by the bison parser to start an AS_NEEDED list.
1173
1174 extern "C" void
1175 script_start_as_needed(void* closurev)
1176 {
1177   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1178   closure->position_dependent_options().set_as_needed();
1179 }
1180
1181 // Called by the bison parser at the end of an AS_NEEDED list.
1182
1183 extern "C" void
1184 script_end_as_needed(void* closurev)
1185 {
1186   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1187   closure->position_dependent_options().clear_as_needed();
1188 }