gnu/dist/gcc4/libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "internal.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, U s  },
  45 #define TK(e, s) { SPELL_ ## s,    U #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  58 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  59                             unsigned int, enum cpp_ttype);
  60 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  61 static int name_p (cpp_reader *, const cpp_string *);
  62 static tokenrun *next_tokenrun (tokenrun *);
  63
  64 static _cpp_buff *new_buff (size_t);
  65
  66
  67 /* Utility routine:
  68
  69    Compares, the token TOKEN to the NUL-terminated string STRING.
  70    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  71 int
  72 cpp_ideq (const cpp_token *token, const char *string)
  73 {
  74   if (token->type != CPP_NAME)
  75     return 0;
  76
  77   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  78 }
  79
  80 /* Record a note TYPE at byte POS into the current cleaned logical
  81    line.  */
  82 static void
  83 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  84 {
  85   if (buffer->notes_used == buffer->notes_cap)
  86     {
  87       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  88       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
  89                                   buffer->notes_cap);
  90     }
  91
  92   buffer->notes[buffer->notes_used].pos = pos;
  93   buffer->notes[buffer->notes_used].type = type;
  94   buffer->notes_used++;
  95 }
  96
  97 /* Returns with a logical line that contains no escaped newlines or
  98    trigraphs.  This is a time-critical inner loop.  */
  99 void
 100 _cpp_clean_line (cpp_reader *pfile)
 101 {
 102   cpp_buffer *buffer;
 103   const uchar *s;
 104   uchar c, *d, *p;
 105
 106   buffer = pfile->buffer;
 107   buffer->cur_note = buffer->notes_used = 0;
 108   buffer->cur = buffer->line_base = buffer->next_line;
 109   buffer->need_line = false;
 110   s = buffer->next_line - 1;
 111
 112   if (!buffer->from_stage3)
 113     {
 114       /* Short circuit for the common case of an un-escaped line with
 115          no trigraphs.  The primary win here is by not writing any
 116          data back to memory until we have to.  */
 117       for (;;)
 118         {
 119           c = *++s;
 120           if (c == '\n' || c == '\r')
 121             {
 122               d = (uchar *) s;
 123
 124               if (s == buffer->rlimit)
 125                 goto done;
 126
 127               /* DOS line ending? */
 128               if (c == '\r' && s[1] == '\n')
 129                 s++;
 130
 131               if (s == buffer->rlimit)
 132                 goto done;
 133
 134               /* check for escaped newline */
 135               p = d;
 136               while (p != buffer->next_line && is_nvspace (p[-1]))
 137                 p--;
 138               if (p == buffer->next_line || p[-1] != '\\')
 139                 goto done;
 140
 141               /* Have an escaped newline; process it and proceed to
 142                  the slow path.  */
 143               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 144               d = p - 2;
 145               buffer->next_line = p - 1;
 146               break;
 147             }
 148           if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 149             {
 150               /* Have a trigraph.  We may or may not have to convert
 151                  it.  Add a line note regardless, for -Wtrigraphs.  */
 152               add_line_note (buffer, s, s[2]);
 153               if (CPP_OPTION (pfile, trigraphs))
 154                 {
 155                   /* We do, and that means we have to switch to the
 156                      slow path.  */
 157                   d = (uchar *) s;
 158                   *d = _cpp_trigraph_map[s[2]];
 159                   s += 2;
 160                   break;
 161                 }
 162             }
 163         }
 164
 165
 166       for (;;)
 167         {
 168           c = *++s;
 169           *++d = c;
 170
 171           if (c == '\n' || c == '\r')
 172             {
 173                   /* Handle DOS line endings.  */
 174               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 175                 s++;
 176               if (s == buffer->rlimit)
 177                 break;
 178
 179               /* Escaped?  */
 180               p = d;
 181               while (p != buffer->next_line && is_nvspace (p[-1]))
 182                 p--;
 183               if (p == buffer->next_line || p[-1] != '\\')
 184                 break;
 185
 186               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 187               d = p - 2;
 188               buffer->next_line = p - 1;
 189             }
 190           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 191             {
 192               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 193               add_line_note (buffer, d, s[2]);
 194               if (CPP_OPTION (pfile, trigraphs))
 195                 {
 196                   *d = _cpp_trigraph_map[s[2]];
 197                   s += 2;
 198                 }
 199             }
 200         }
 201     }
 202   else
 203     {
 204       do
 205         s++;
 206       while (*s != '\n' && *s != '\r');
 207       d = (uchar *) s;
 208
 209       /* Handle DOS line endings.  */
 210       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 211         s++;
 212     }
 213
 214  done:
 215   *d = '\n';
 216   /* A sentinel note that should never be processed.  */
 217   add_line_note (buffer, d + 1, '\n');
 218   buffer->next_line = s + 1;
 219 }
 220
 221 /* Return true if the trigraph indicated by NOTE should be warned
 222    about in a comment.  */
 223 static bool
 224 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 225 {
 226   const uchar *p;
 227
 228   /* Within comments we don't warn about trigraphs, unless the
 229      trigraph forms an escaped newline, as that may change
 230      behavior.  */
 231   if (note->type != '/')
 232     return false;
 233
 234   /* If -trigraphs, then this was an escaped newline iff the next note
 235      is coincident.  */
 236   if (CPP_OPTION (pfile, trigraphs))
 237     return note[1].pos == note->pos;
 238
 239   /* Otherwise, see if this forms an escaped newline.  */
 240   p = note->pos + 3;
 241   while (is_nvspace (*p))
 242     p++;
 243
 244   /* There might have been escaped newlines between the trigraph and the
 245      newline we found.  Hence the position test.  */
 246   return (*p == '\n' && p < note[1].pos);
 247 }
 248
 249 /* Process the notes created by add_line_note as far as the current
 250    location.  */
 251 void
 252 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 253 {
 254   cpp_buffer *buffer = pfile->buffer;
 255
 256   for (;;)
 257     {
 258       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 259       unsigned int col;
 260
 261       if (note->pos > buffer->cur)
 262         break;
 263
 264       buffer->cur_note++;
 265       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 266
 267       if (note->type == '\\' || note->type == ' ')
 268         {
 269           if (note->type == ' ' && !in_comment)
 270             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 271                                  "backslash and newline separated by space");
 272
 273           if (buffer->next_line > buffer->rlimit)
 274             {
 275               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
 276                                    "backslash-newline at end of file");
 277               /* Prevent "no newline at end of file" warning.  */
 278               buffer->next_line = buffer->rlimit;
 279             }
 280
 281           buffer->line_base = note->pos;
 282           CPP_INCREMENT_LINE (pfile, 0);
 283         }
 284       else if (_cpp_trigraph_map[note->type])
 285         {
 286           if (CPP_OPTION (pfile, warn_trigraphs)
 287               && (!in_comment || warn_in_comment (pfile, note)))
 288             {
 289               if (CPP_OPTION (pfile, trigraphs))
 290                 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 291                                      "trigraph ??%c converted to %c",
 292                                      note->type,
 293                                      (int) _cpp_trigraph_map[note->type]);
 294               else
 295                 {
 296                   cpp_error_with_line
 297                     (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 298                      "trigraph ??%c ignored, use -trigraphs to enable",
 299                      note->type);
 300                 }
 301             }
 302         }
 303       else
 304         abort ();
 305     }
 306 }
 307
 308 /* Skip a C-style block comment.  We find the end of the comment by
 309    seeing if an asterisk is before every '/' we encounter.  Returns
 310    nonzero if comment terminated by EOF, zero otherwise.
 311
 312    Buffer->cur points to the initial asterisk of the comment.  */
 313 bool
 314 _cpp_skip_block_comment (cpp_reader *pfile)
 315 {
 316   cpp_buffer *buffer = pfile->buffer;
 317   const uchar *cur = buffer->cur;
 318   uchar c;
 319
 320   cur++;
 321   if (*cur == '/')
 322     cur++;
 323
 324   for (;;)
 325     {
 326       /* People like decorating comments with '*', so check for '/'
 327          instead for efficiency.  */
 328       c = *cur++;
 329
 330       if (c == '/')
 331         {
 332           if (cur[-2] == '*')
 333             break;
 334
 335           /* Warn about potential nested comments, but not if the '/'
 336              comes immediately before the true comment delimiter.
 337              Don't bother to get it right across escaped newlines.  */
 338           if (CPP_OPTION (pfile, warn_comments)
 339               && cur[0] == '*' && cur[1] != '/')
 340             {
 341               buffer->cur = cur;
 342               cpp_error_with_line (pfile, CPP_DL_WARNING,
 343                                    pfile->line_table->highest_line, CPP_BUF_COL (buffer),
 344                                    "\"/*\" within comment");
 345             }
 346         }
 347       else if (c == '\n')
 348         {
 349           unsigned int cols;
 350           buffer->cur = cur - 1;
 351           _cpp_process_line_notes (pfile, true);
 352           if (buffer->next_line >= buffer->rlimit)
 353             return true;
 354           _cpp_clean_line (pfile);
 355
 356           cols = buffer->next_line - buffer->line_base;
 357           CPP_INCREMENT_LINE (pfile, cols);
 358
 359           cur = buffer->cur;
 360         }
 361     }
 362
 363   buffer->cur = cur;
 364   _cpp_process_line_notes (pfile, true);
 365   return false;
 366 }
 367
 368 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 369    terminating newline.  Handles escaped newlines.  Returns nonzero
 370    if a multiline comment.  */
 371 static int
 372 skip_line_comment (cpp_reader *pfile)
 373 {
 374   cpp_buffer *buffer = pfile->buffer;
 375   unsigned int orig_line = pfile->line_table->highest_line;
 376
 377   while (*buffer->cur != '\n')
 378     buffer->cur++;
 379
 380   _cpp_process_line_notes (pfile, true);
 381   return orig_line != pfile->line_table->highest_line;
 382 }
 383
 384 /* Skips whitespace, saving the next non-whitespace character.  */
 385 static void
 386 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 387 {
 388   cpp_buffer *buffer = pfile->buffer;
 389   bool saw_NUL = false;
 390
 391   do
 392     {
 393       /* Horizontal space always OK.  */
 394       if (c == ' ' || c == '\t')
 395         ;
 396       /* Just \f \v or \0 left.  */
 397       else if (c == '\0')
 398         saw_NUL = true;
 399       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 400         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 401                              CPP_BUF_COL (buffer),
 402                              "%s in preprocessing directive",
 403                              c == '\f' ? "form feed" : "vertical tab");
 404
 405       c = *buffer->cur++;
 406     }
 407   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 408   while (is_nvspace (c));
 409
 410   if (saw_NUL)
 411     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 412
 413   buffer->cur--;
 414 }
 415
 416 /* See if the characters of a number token are valid in a name (no
 417    '.', '+' or '-').  */
 418 static int
 419 name_p (cpp_reader *pfile, const cpp_string *string)
 420 {
 421   unsigned int i;
 422
 423   for (i = 0; i < string->len; i++)
 424     if (!is_idchar (string->text[i]))
 425       return 0;
 426
 427   return 1;
 428 }
 429
 430 /* After parsing an identifier or other sequence, produce a warning about
 431    sequences not in NFC/NFKC.  */
 432 static void
 433 warn_about_normalization (cpp_reader *pfile,
 434                           const cpp_token *token,
 435                           const struct normalize_state *s)
 436 {
 437   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
 438       && !pfile->state.skipping)
 439     {
 440       /* Make sure that the token is printed using UCNs, even
 441          if we'd otherwise happily print UTF-8.  */
 442       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
 443       size_t sz;
 444
 445       sz = cpp_spell_token (pfile, token, buf, false) - buf;
 446       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
 447         cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
 448                              "`%.*s' is not in NFKC", (int) sz, buf);
 449       else
 450         cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
 451                              "`%.*s' is not in NFC", (int) sz, buf);
 452     }
 453 }
 454
 455 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 456    an identifier.  FIRST is TRUE if this starts an identifier.  */
 457 static bool
 458 forms_identifier_p (cpp_reader *pfile, int first,
 459                     struct normalize_state *state)
 460 {
 461   cpp_buffer *buffer = pfile->buffer;
 462
 463   if (*buffer->cur == '$')
 464     {
 465       if (!CPP_OPTION (pfile, dollars_in_ident))
 466         return false;
 467
 468       buffer->cur++;
 469       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 470         {
 471           CPP_OPTION (pfile, warn_dollars) = 0;
 472           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 473         }
 474
 475       return true;
 476     }
 477
 478   /* Is this a syntactically valid UCN?  */
 479   if (CPP_OPTION (pfile, extended_identifiers)
 480       && *buffer->cur == '\\'
 481       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 482     {
 483       buffer->cur += 2;
 484       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
 485                           state))
 486         return true;
 487       buffer->cur -= 2;
 488     }
 489
 490   return false;
 491 }
 492
 493 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 494 static cpp_hashnode *
 495 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
 496                 struct normalize_state *nst)
 497 {
 498   cpp_hashnode *result;
 499   const uchar *cur;
 500   unsigned int len;
 501   unsigned int hash = HT_HASHSTEP (0, *base);
 502
 503   cur = pfile->buffer->cur;
 504   if (! starts_ucn)
 505     while (ISIDNUM (*cur))
 506       {
 507         hash = HT_HASHSTEP (hash, *cur);
 508         cur++;
 509       }
 510   pfile->buffer->cur = cur;
 511   if (starts_ucn || forms_identifier_p (pfile, false, nst))
 512     {
 513       /* Slower version for identifiers containing UCNs (or $).  */
 514       do {
 515         while (ISIDNUM (*pfile->buffer->cur))
 516           {
 517             pfile->buffer->cur++;
 518             NORMALIZE_STATE_UPDATE_IDNUM (nst);
 519           }
 520       } while (forms_identifier_p (pfile, false, nst));
 521       result = _cpp_interpret_identifier (pfile, base,
 522                                           pfile->buffer->cur - base);
 523     }
 524   else
 525     {
 526       len = cur - base;
 527       hash = HT_HASHFINISH (hash, len);
 528
 529       result = (cpp_hashnode *)
 530         ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
 531     }
 532
 533   /* Rarely, identifiers require diagnostics when lexed.  */
 534   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 535                         && !pfile->state.skipping, 0))
 536     {
 537       /* It is allowed to poison the same identifier twice.  */
 538       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 539         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 540                    NODE_NAME (result));
 541
 542       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 543          replacement list of a variadic macro.  */
 544       if (result == pfile->spec_nodes.n__VA_ARGS__
 545           && !pfile->state.va_args_ok)
 546         cpp_error (pfile, CPP_DL_PEDWARN,
 547                    "__VA_ARGS__ can only appear in the expansion"
 548                    " of a C99 variadic macro");
 549     }
 550
 551   return result;
 552 }
 553
 554 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 555 static void
 556 lex_number (cpp_reader *pfile, cpp_string *number,
 557             struct normalize_state *nst)
 558 {
 559   const uchar *cur;
 560   const uchar *base;
 561   uchar *dest;
 562
 563   base = pfile->buffer->cur - 1;
 564   do
 565     {
 566       cur = pfile->buffer->cur;
 567
 568       /* N.B. ISIDNUM does not include $.  */
 569       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 570         {
 571           cur++;
 572           NORMALIZE_STATE_UPDATE_IDNUM (nst);
 573         }
 574
 575       pfile->buffer->cur = cur;
 576     }
 577   while (forms_identifier_p (pfile, false, nst));
 578
 579   number->len = cur - base;
 580   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 581   memcpy (dest, base, number->len);
 582   dest[number->len] = '\0';
 583   number->text = dest;
 584 }
 585
 586 /* Create a token of type TYPE with a literal spelling.  */
 587 static void
 588 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 589                 unsigned int len, enum cpp_ttype type)
 590 {
 591   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 592
 593   memcpy (dest, base, len);
 594   dest[len] = '\0';
 595   token->type = type;
 596   token->val.str.len = len;
 597   token->val.str.text = dest;
 598 }
 599
 600 /* Lexes a string, character constant, or angle-bracketed header file
 601    name.  The stored string contains the spelling, including opening
 602    quote and leading any leading 'L'.  It returns the type of the
 603    literal, or CPP_OTHER if it was not properly terminated.
 604
 605    The spelling is NUL-terminated, but it is not guaranteed that this
 606    is the first NUL since embedded NULs are preserved.  */
 607 static void
 608 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 609 {
 610   bool saw_NUL = false;
 611   const uchar *cur;
 612   cppchar_t terminator;
 613   enum cpp_ttype type;
 614
 615   cur = base;
 616   terminator = *cur++;
 617   if (terminator == 'L')
 618     terminator = *cur++;
 619   if (terminator == '\"')
 620     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 621   else if (terminator == '\'')
 622     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 623   else
 624     terminator = '>', type = CPP_HEADER_NAME;
 625
 626   for (;;)
 627     {
 628       cppchar_t c = *cur++;
 629
 630       /* In #include-style directives, terminators are not escapable.  */
 631       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 632         cur++;
 633       else if (c == terminator)
 634         break;
 635       else if (c == '\n')
 636         {
 637           cur--;
 638           type = CPP_OTHER;
 639           break;
 640         }
 641       else if (c == '\0')
 642         saw_NUL = true;
 643     }
 644
 645   if (saw_NUL && !pfile->state.skipping)
 646     cpp_error (pfile, CPP_DL_WARNING,
 647                "null character(s) preserved in literal");
 648
 649   pfile->buffer->cur = cur;
 650   create_literal (pfile, token, base, cur - base, type);
 651 }
 652
 653 /* The stored comment includes the comment start and any terminator.  */
 654 static void
 655 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 656               cppchar_t type)
 657 {
 658   unsigned char *buffer;
 659   unsigned int len, clen;
 660   int convert_to_c = (pfile->state.in_directive || pfile->state.collecting_args)
 661     && type == '/';
 662
 663   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 664
 665   /* C++ comments probably (not definitely) have moved past a new
 666      line, which we don't want to save in the comment.  */
 667   if (is_vspace (pfile->buffer->cur[-1]))
 668     len--;
 669
 670   /* If we are currently in a directive, then we need to store all
 671      C++ comments as C comments internally, and so we need to
 672      allocate a little extra space in that case.
 673
 674      Note that the only time we encounter a directive here is
 675      when we are saving comments in a "#define".  */
 676   clen = convert_to_c ? len + 2 : len;
 677
 678   buffer = _cpp_unaligned_alloc (pfile, clen);
 679
 680   token->type = CPP_COMMENT;
 681   token->val.str.len = clen;
 682   token->val.str.text = buffer;
 683
 684   buffer[0] = '/';
 685   memcpy (buffer + 1, from, len - 1);
 686
 687   /* Finish conversion to a C comment, if necessary.  */
 688   if (convert_to_c)
 689     {
 690       buffer[1] = '*';
 691       buffer[clen - 2] = '*';
 692       buffer[clen - 1] = '/';
 693     }
 694 }
 695
 696 /* Allocate COUNT tokens for RUN.  */
 697 void
 698 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 699 {
 700   run->base = XNEWVEC (cpp_token, count);
 701   run->limit = run->base + count;
 702   run->next = NULL;
 703 }
 704
 705 /* Returns the next tokenrun, or creates one if there is none.  */
 706 static tokenrun *
 707 next_tokenrun (tokenrun *run)
 708 {
 709   if (run->next == NULL)
 710     {
 711       run->next = XNEW (tokenrun);
 712       run->next->prev = run;
 713       _cpp_init_tokenrun (run->next, 250);
 714     }
 715
 716   return run->next;
 717 }
 718
 719 /* Allocate a single token that is invalidated at the same time as the
 720    rest of the tokens on the line.  Has its line and col set to the
 721    same as the last lexed token, so that diagnostics appear in the
 722    right place.  */
 723 cpp_token *
 724 _cpp_temp_token (cpp_reader *pfile)
 725 {
 726   cpp_token *old, *result;
 727
 728   old = pfile->cur_token - 1;
 729   if (pfile->cur_token == pfile->cur_run->limit)
 730     {
 731       pfile->cur_run = next_tokenrun (pfile->cur_run);
 732       pfile->cur_token = pfile->cur_run->base;
 733     }
 734
 735   result = pfile->cur_token++;
 736   result->src_loc = old->src_loc;
 737   return result;
 738 }
 739
 740 /* Lex a token into RESULT (external interface).  Takes care of issues
 741    like directive handling, token lookahead, multiple include
 742    optimization and skipping.  */
 743 const cpp_token *
 744 _cpp_lex_token (cpp_reader *pfile)
 745 {
 746   cpp_token *result;
 747
 748   for (;;)
 749     {
 750       if (pfile->cur_token == pfile->cur_run->limit)
 751         {
 752           pfile->cur_run = next_tokenrun (pfile->cur_run);
 753           pfile->cur_token = pfile->cur_run->base;
 754         }
 755
 756       if (pfile->lookaheads)
 757         {
 758           pfile->lookaheads--;
 759           result = pfile->cur_token++;
 760         }
 761       else
 762         result = _cpp_lex_direct (pfile);
 763
 764       if (result->flags & BOL)
 765         {
 766           /* Is this a directive.  If _cpp_handle_directive returns
 767              false, it is an assembler #.  */
 768           if (result->type == CPP_HASH
 769               /* 6.10.3 p 11: Directives in a list of macro arguments
 770                  gives undefined behavior.  This implementation
 771                  handles the directive as normal.  */
 772               && pfile->state.parsing_args != 1
 773               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 774             {
 775               if (pfile->directive_result.type == CPP_PADDING)
 776                 continue;
 777               else
 778                 {
 779                   result = &pfile->directive_result;
 780                   break;
 781                 }
 782             }
 783
 784           if (pfile->cb.line_change && !pfile->state.skipping)
 785             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
 786         }
 787
 788       /* We don't skip tokens in directives.  */
 789       if (pfile->state.in_directive)
 790         break;
 791
 792       /* Outside a directive, invalidate controlling macros.  At file
 793          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 794          get here and MI optimization works.  */
 795       pfile->mi_valid = false;
 796
 797       if (!pfile->state.skipping || result->type == CPP_EOF)
 798         break;
 799     }
 800
 801   return result;
 802 }
 803
 804 /* Returns true if a fresh line has been loaded.  */
 805 bool
 806 _cpp_get_fresh_line (cpp_reader *pfile)
 807 {
 808   int return_at_eof;
 809
 810   /* We can't get a new line until we leave the current directive.  */
 811   if (pfile->state.in_directive)
 812     return false;
 813
 814   for (;;)
 815     {
 816       cpp_buffer *buffer = pfile->buffer;
 817
 818       if (!buffer->need_line)
 819         return true;
 820
 821       if (buffer->next_line < buffer->rlimit)
 822         {
 823           _cpp_clean_line (pfile);
 824           return true;
 825         }
 826
 827       /* First, get out of parsing arguments state.  */
 828       if (pfile->state.parsing_args)
 829         return false;
 830
 831       /* End of buffer.  Non-empty files should end in a newline.  */
 832       if (buffer->buf != buffer->rlimit
 833           && buffer->next_line > buffer->rlimit
 834           && !buffer->from_stage3)
 835         {
 836           /* Only warn once.  */
 837           buffer->next_line = buffer->rlimit;
 838           cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 839                                CPP_BUF_COLUMN (buffer, buffer->cur),
 840                                "no newline at end of file");
 841         }
 842
 843       return_at_eof = buffer->return_at_eof;
 844       _cpp_pop_buffer (pfile);
 845       if (pfile->buffer == NULL || return_at_eof)
 846         return false;
 847     }
 848 }
 849
 850 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 851   do                                                    \
 852     {                                                   \
 853       result->type = ELSE_TYPE;                         \
 854       if (*buffer->cur == CHAR)                         \
 855         buffer->cur++, result->type = THEN_TYPE;        \
 856     }                                                   \
 857   while (0)
 858
 859 /* Lex a token into pfile->cur_token, which is also incremented, to
 860    get diagnostics pointing to the correct location.
 861
 862    Does not handle issues such as token lookahead, multiple-include
 863    optimization, directives, skipping etc.  This function is only
 864    suitable for use by _cpp_lex_token, and in special cases like
 865    lex_expansion_token which doesn't care for any of these issues.
 866
 867    When meeting a newline, returns CPP_EOF if parsing a directive,
 868    otherwise returns to the start of the token buffer if permissible.
 869    Returns the location of the lexed token.  */
 870 cpp_token *
 871 _cpp_lex_direct (cpp_reader *pfile)
 872 {
 873   cppchar_t c;
 874   cpp_buffer *buffer;
 875   const unsigned char *comment_start;
 876   cpp_token *result = pfile->cur_token++;
 877
 878  fresh_line:
 879   result->flags = 0;
 880   buffer = pfile->buffer;
 881   if (buffer->need_line)
 882     {
 883       if (!_cpp_get_fresh_line (pfile))
 884         {
 885           result->type = CPP_EOF;
 886           if (!pfile->state.in_directive)
 887             {
 888               /* Tell the compiler the line number of the EOF token.  */
 889               result->src_loc = pfile->line_table->highest_line;
 890               result->flags = BOL;
 891             }
 892           return result;
 893         }
 894       if (!pfile->keep_tokens)
 895         {
 896           pfile->cur_run = &pfile->base_run;
 897           result = pfile->base_run.base;
 898           pfile->cur_token = result + 1;
 899         }
 900       result->flags = BOL;
 901       if (pfile->state.parsing_args == 2)
 902         result->flags |= PREV_WHITE;
 903     }
 904   buffer = pfile->buffer;
 905  update_tokens_line:
 906   result->src_loc = pfile->line_table->highest_line;
 907
 908  skipped_white:
 909   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 910       && !pfile->overlaid_buffer)
 911     {
 912       _cpp_process_line_notes (pfile, false);
 913       result->src_loc = pfile->line_table->highest_line;
 914     }
 915   c = *buffer->cur++;
 916
 917   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
 918                                CPP_BUF_COLUMN (buffer, buffer->cur));
 919
 920   switch (c)
 921     {
 922     case ' ': case '\t': case '\f': case '\v': case '\0':
 923       result->flags |= PREV_WHITE;
 924       skip_whitespace (pfile, c);
 925       goto skipped_white;
 926
 927     case '\n':
 928       if (buffer->cur < buffer->rlimit)
 929         CPP_INCREMENT_LINE (pfile, 0);
 930       buffer->need_line = true;
 931       goto fresh_line;
 932
 933     case '0': case '1': case '2': case '3': case '4':
 934     case '5': case '6': case '7': case '8': case '9':
 935       {
 936         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
 937         result->type = CPP_NUMBER;
 938         lex_number (pfile, &result->val.str, &nst);
 939         warn_about_normalization (pfile, result, &nst);
 940         break;
 941       }
 942
 943     case 'L':
 944       /* 'L' may introduce wide characters or strings.  */
 945       if (*buffer->cur == '\'' || *buffer->cur == '"')
 946         {
 947           lex_string (pfile, result, buffer->cur - 1);
 948           break;
 949         }
 950       /* Fall through.  */
 951
 952     case '_':
 953     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 954     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 955     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 956     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 957     case 'y': case 'z':
 958     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 959     case 'G': case 'H': case 'I': case 'J': case 'K':
 960     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 961     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 962     case 'Y': case 'Z':
 963       result->type = CPP_NAME;
 964       {
 965         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
 966         result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
 967                                            &nst);
 968         warn_about_normalization (pfile, result, &nst);
 969       }
 970
 971       /* Convert named operators to their proper types.  */
 972       if (result->val.node->flags & NODE_OPERATOR)
 973         {
 974           result->flags |= NAMED_OP;
 975           result->type = (enum cpp_ttype) result->val.node->directive_index;
 976         }
 977       break;
 978
 979     case '\'':
 980     case '"':
 981       lex_string (pfile, result, buffer->cur - 1);
 982       break;
 983
 984     case '/':
 985       /* A potential block or line comment.  */
 986       comment_start = buffer->cur;
 987       c = *buffer->cur;
 988
 989       if (c == '*')
 990         {
 991           if (_cpp_skip_block_comment (pfile))
 992             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
 993         }
 994       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 995                             || cpp_in_system_header (pfile)))
 996         {
 997           /* Warn about comments only if pedantically GNUC89, and not
 998              in system headers.  */
 999           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1000               && ! buffer->warned_cplusplus_comments)
1001             {
1002               cpp_error (pfile, CPP_DL_PEDWARN,
1003                          "C++ style comments are not allowed in ISO C90");
1004               cpp_error (pfile, CPP_DL_PEDWARN,
1005                          "(this will be reported only once per input file)");
1006               buffer->warned_cplusplus_comments = 1;
1007             }
1008
1009           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1010             cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1011         }
1012       else if (c == '=')
1013         {
1014           buffer->cur++;
1015           result->type = CPP_DIV_EQ;
1016           break;
1017         }
1018       else
1019         {
1020           result->type = CPP_DIV;
1021           break;
1022         }
1023
1024       if (!pfile->state.save_comments)
1025         {
1026           result->flags |= PREV_WHITE;
1027           goto update_tokens_line;
1028         }
1029
1030       /* Save the comment as a token in its own right.  */
1031       save_comment (pfile, result, comment_start, c);
1032       break;
1033
1034     case '<':
1035       if (pfile->state.angled_headers)
1036         {
1037           lex_string (pfile, result, buffer->cur - 1);
1038           break;
1039         }
1040
1041       result->type = CPP_LESS;
1042       if (*buffer->cur == '=')
1043         buffer->cur++, result->type = CPP_LESS_EQ;
1044       else if (*buffer->cur == '<')
1045         {
1046           buffer->cur++;
1047           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1048         }
1049       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1050         {
1051           buffer->cur++;
1052           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1053         }
1054       else if (CPP_OPTION (pfile, digraphs))
1055         {
1056           if (*buffer->cur == ':')
1057             {
1058               buffer->cur++;
1059               result->flags |= DIGRAPH;
1060               result->type = CPP_OPEN_SQUARE;
1061             }
1062           else if (*buffer->cur == '%')
1063             {
1064               buffer->cur++;
1065               result->flags |= DIGRAPH;
1066               result->type = CPP_OPEN_BRACE;
1067             }
1068         }
1069       break;
1070
1071     case '>':
1072       result->type = CPP_GREATER;
1073       if (*buffer->cur == '=')
1074         buffer->cur++, result->type = CPP_GREATER_EQ;
1075       else if (*buffer->cur == '>')
1076         {
1077           buffer->cur++;
1078           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1079         }
1080       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1081         {
1082           buffer->cur++;
1083           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1084         }
1085       break;
1086
1087     case '%':
1088       result->type = CPP_MOD;
1089       if (*buffer->cur == '=')
1090         buffer->cur++, result->type = CPP_MOD_EQ;
1091       else if (CPP_OPTION (pfile, digraphs))
1092         {
1093           if (*buffer->cur == ':')
1094             {
1095               buffer->cur++;
1096               result->flags |= DIGRAPH;
1097               result->type = CPP_HASH;
1098               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1099                 buffer->cur += 2, result->type = CPP_PASTE;
1100             }
1101           else if (*buffer->cur == '>')
1102             {
1103               buffer->cur++;
1104               result->flags |= DIGRAPH;
1105               result->type = CPP_CLOSE_BRACE;
1106             }
1107         }
1108       break;
1109
1110     case '.':
1111       result->type = CPP_DOT;
1112       if (ISDIGIT (*buffer->cur))
1113         {
1114           struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1115           result->type = CPP_NUMBER;
1116           lex_number (pfile, &result->val.str, &nst);
1117           warn_about_normalization (pfile, result, &nst);
1118         }
1119       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1120         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1121       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1122         buffer->cur++, result->type = CPP_DOT_STAR;
1123       break;
1124
1125     case '+':
1126       result->type = CPP_PLUS;
1127       if (*buffer->cur == '+')
1128         buffer->cur++, result->type = CPP_PLUS_PLUS;
1129       else if (*buffer->cur == '=')
1130         buffer->cur++, result->type = CPP_PLUS_EQ;
1131       break;
1132
1133     case '-':
1134       result->type = CPP_MINUS;
1135       if (*buffer->cur == '>')
1136         {
1137           buffer->cur++;
1138           result->type = CPP_DEREF;
1139           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1140             buffer->cur++, result->type = CPP_DEREF_STAR;
1141         }
1142       else if (*buffer->cur == '-')
1143         buffer->cur++, result->type = CPP_MINUS_MINUS;
1144       else if (*buffer->cur == '=')
1145         buffer->cur++, result->type = CPP_MINUS_EQ;
1146       break;
1147
1148     case '&':
1149       result->type = CPP_AND;
1150       if (*buffer->cur == '&')
1151         buffer->cur++, result->type = CPP_AND_AND;
1152       else if (*buffer->cur == '=')
1153         buffer->cur++, result->type = CPP_AND_EQ;
1154       break;
1155
1156     case '|':
1157       result->type = CPP_OR;
1158       if (*buffer->cur == '|')
1159         buffer->cur++, result->type = CPP_OR_OR;
1160       else if (*buffer->cur == '=')
1161         buffer->cur++, result->type = CPP_OR_EQ;
1162       break;
1163
1164     case ':':
1165       result->type = CPP_COLON;
1166       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1167         buffer->cur++, result->type = CPP_SCOPE;
1168       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1169         {
1170           buffer->cur++;
1171           result->flags |= DIGRAPH;
1172           result->type = CPP_CLOSE_SQUARE;
1173         }
1174       break;
1175
1176     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1177     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1178     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1179     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1180     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1181
1182     case '?': result->type = CPP_QUERY; break;
1183     case '~': result->type = CPP_COMPL; break;
1184     case ',': result->type = CPP_COMMA; break;
1185     case '(': result->type = CPP_OPEN_PAREN; break;
1186     case ')': result->type = CPP_CLOSE_PAREN; break;
1187     case '[': result->type = CPP_OPEN_SQUARE; break;
1188     case ']': result->type = CPP_CLOSE_SQUARE; break;
1189     case '{': result->type = CPP_OPEN_BRACE; break;
1190     case '}': result->type = CPP_CLOSE_BRACE; break;
1191     case ';': result->type = CPP_SEMICOLON; break;
1192
1193       /* @ is a punctuator in Objective-C.  */
1194     case '@': result->type = CPP_ATSIGN; break;
1195
1196     case '$':
1197     case '\\':
1198       {
1199         const uchar *base = --buffer->cur;
1200         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1201
1202         if (forms_identifier_p (pfile, true, &nst))
1203           {
1204             result->type = CPP_NAME;
1205             result->val.node = lex_identifier (pfile, base, true, &nst);
1206             warn_about_normalization (pfile, result, &nst);
1207             break;
1208           }
1209         buffer->cur++;
1210       }
1211
1212     default:
1213       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1214       break;
1215     }
1216
1217   return result;
1218 }
1219
1220 /* An upper bound on the number of bytes needed to spell TOKEN.
1221    Does not include preceding whitespace.  */
1222 unsigned int
1223 cpp_token_len (const cpp_token *token)
1224 {
1225   unsigned int len;
1226
1227   switch (TOKEN_SPELL (token))
1228     {
1229     default:            len = 4;                                break;
1230     case SPELL_LITERAL: len = token->val.str.len;               break;
1231     case SPELL_IDENT:   len = NODE_LEN (token->val.node) * 10;  break;
1232     }
1233
1234   return len;
1235 }
1236
1237 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1238    Return the number of bytes read out of NAME.  (There are always
1239    10 bytes written to BUFFER.)  */
1240
1241 static size_t
1242 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1243 {
1244   int j;
1245   int ucn_len = 0;
1246   int ucn_len_c;
1247   unsigned t;
1248   unsigned long utf32;
1249
1250   /* Compute the length of the UTF-8 sequence.  */
1251   for (t = *name; t & 0x80; t <<= 1)
1252     ucn_len++;
1253
1254   utf32 = *name & (0x7F >> ucn_len);
1255   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1256     {
1257       utf32 = (utf32 << 6) | (*++name & 0x3F);
1258
1259       /* Ill-formed UTF-8.  */
1260       if ((*name & ~0x3F) != 0x80)
1261         abort ();
1262     }
1263
1264   *buffer++ = '\\';
1265   *buffer++ = 'U';
1266   for (j = 7; j >= 0; j--)
1267     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1268   return ucn_len;
1269 }
1270
1271
1272 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1273    already contain the enough space to hold the token's spelling.
1274    Returns a pointer to the character after the last character written.
1275    FORSTRING is true if this is to be the spelling after translation
1276    phase 1 (this is different for UCNs).
1277    FIXME: Would be nice if we didn't need the PFILE argument.  */
1278 unsigned char *
1279 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1280                  unsigned char *buffer, bool forstring)
1281 {
1282   switch (TOKEN_SPELL (token))
1283     {
1284     case SPELL_OPERATOR:
1285       {
1286         const unsigned char *spelling;
1287         unsigned char c;
1288
1289         if (token->flags & DIGRAPH)
1290           spelling
1291             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1292         else if (token->flags & NAMED_OP)
1293           goto spell_ident;
1294         else
1295           spelling = TOKEN_NAME (token);
1296
1297         while ((c = *spelling++) != '\0')
1298           *buffer++ = c;
1299       }
1300       break;
1301
1302     spell_ident:
1303     case SPELL_IDENT:
1304       if (forstring)
1305         {
1306           memcpy (buffer, NODE_NAME (token->val.node),
1307                   NODE_LEN (token->val.node));
1308           buffer += NODE_LEN (token->val.node);
1309         }
1310       else
1311         {
1312           size_t i;
1313           const unsigned char * name = NODE_NAME (token->val.node);
1314
1315           for (i = 0; i < NODE_LEN (token->val.node); i++)
1316             if (name[i] & ~0x7F)
1317               {
1318                 i += utf8_to_ucn (buffer, name + i) - 1;
1319                 buffer += 10;
1320               }
1321             else
1322               *buffer++ = NODE_NAME (token->val.node)[i];
1323         }
1324       break;
1325
1326     case SPELL_LITERAL:
1327       memcpy (buffer, token->val.str.text, token->val.str.len);
1328       buffer += token->val.str.len;
1329       break;
1330
1331     case SPELL_NONE:
1332       cpp_error (pfile, CPP_DL_ICE,
1333                  "unspellable token %s", TOKEN_NAME (token));
1334       break;
1335     }
1336
1337   return buffer;
1338 }
1339
1340 /* Returns TOKEN spelt as a null-terminated string.  The string is
1341    freed when the reader is destroyed.  Useful for diagnostics.  */
1342 unsigned char *
1343 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1344 {
1345   unsigned int len = cpp_token_len (token) + 1;
1346   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1347
1348   end = cpp_spell_token (pfile, token, start, false);
1349   end[0] = '\0';
1350
1351   return start;
1352 }
1353
1354 /* Used by C front ends, which really should move to using
1355    cpp_token_as_text.  */
1356 const char *
1357 cpp_type2name (enum cpp_ttype type)
1358 {
1359   return (const char *) token_spellings[type].name;
1360 }
1361
1362 /* Writes the spelling of token to FP, without any preceding space.
1363    Separated from cpp_spell_token for efficiency - to avoid stdio
1364    double-buffering.  */
1365 void
1366 cpp_output_token (const cpp_token *token, FILE *fp)
1367 {
1368   switch (TOKEN_SPELL (token))
1369     {
1370     case SPELL_OPERATOR:
1371       {
1372         const unsigned char *spelling;
1373         int c;
1374
1375         if (token->flags & DIGRAPH)
1376           spelling
1377             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1378         else if (token->flags & NAMED_OP)
1379           goto spell_ident;
1380         else
1381           spelling = TOKEN_NAME (token);
1382
1383         c = *spelling;
1384         do
1385           putc (c, fp);
1386         while ((c = *++spelling) != '\0');
1387       }
1388       break;
1389
1390     spell_ident:
1391     case SPELL_IDENT:
1392       {
1393         size_t i;
1394         const unsigned char * name = NODE_NAME (token->val.node);
1395
1396         for (i = 0; i < NODE_LEN (token->val.node); i++)
1397           if (name[i] & ~0x7F)
1398             {
1399               unsigned char buffer[10];
1400               i += utf8_to_ucn (buffer, name + i) - 1;
1401               fwrite (buffer, 1, 10, fp);
1402             }
1403           else
1404             fputc (NODE_NAME (token->val.node)[i], fp);
1405       }
1406       break;
1407
1408     case SPELL_LITERAL:
1409       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1410       break;
1411
1412     case SPELL_NONE:
1413       /* An error, most probably.  */
1414       break;
1415     }
1416 }
1417
1418 /* Compare two tokens.  */
1419 int
1420 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1421 {
1422   if (a->type == b->type && a->flags == b->flags)
1423     switch (TOKEN_SPELL (a))
1424       {
1425       default:                  /* Keep compiler happy.  */
1426       case SPELL_OPERATOR:
1427         return 1;
1428       case SPELL_NONE:
1429         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1430       case SPELL_IDENT:
1431         return a->val.node == b->val.node;
1432       case SPELL_LITERAL:
1433         return (a->val.str.len == b->val.str.len
1434                 && !memcmp (a->val.str.text, b->val.str.text,
1435                             a->val.str.len));
1436       }
1437
1438   return 0;
1439 }
1440
1441 /* Returns nonzero if a space should be inserted to avoid an
1442    accidental token paste for output.  For simplicity, it is
1443    conservative, and occasionally advises a space where one is not
1444    needed, e.g. "." and ".2".  */
1445 int
1446 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1447                  const cpp_token *token2)
1448 {
1449   enum cpp_ttype a = token1->type, b = token2->type;
1450   cppchar_t c;
1451
1452   if (token1->flags & NAMED_OP)
1453     a = CPP_NAME;
1454   if (token2->flags & NAMED_OP)
1455     b = CPP_NAME;
1456
1457   c = EOF;
1458   if (token2->flags & DIGRAPH)
1459     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1460   else if (token_spellings[b].category == SPELL_OPERATOR)
1461     c = token_spellings[b].name[0];
1462
1463   /* Quickly get everything that can paste with an '='.  */
1464   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1465     return 1;
1466
1467   switch (a)
1468     {
1469     case CPP_GREATER:   return c == '>' || c == '?';
1470     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1471     case CPP_PLUS:      return c == '+';
1472     case CPP_MINUS:     return c == '-' || c == '>';
1473     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1474     case CPP_MOD:       return c == ':' || c == '>';
1475     case CPP_AND:       return c == '&';
1476     case CPP_OR:        return c == '|';
1477     case CPP_COLON:     return c == ':' || c == '>';
1478     case CPP_DEREF:     return c == '*';
1479     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1480     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1481     case CPP_NAME:      return ((b == CPP_NUMBER
1482                                  && name_p (pfile, &token2->val.str))
1483                                 || b == CPP_NAME
1484                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1485     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1486                                 || c == '.' || c == '+' || c == '-');
1487                                       /* UCNs */
1488     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1489                                  && b == CPP_NAME)
1490                                 || (CPP_OPTION (pfile, objc)
1491                                     && token1->val.str.text[0] == '@'
1492                                     && (b == CPP_NAME || b == CPP_STRING)));
1493     default:            break;
1494     }
1495
1496   return 0;
1497 }
1498
1499 /* Output all the remaining tokens on the current line, and a newline
1500    character, to FP.  Leading whitespace is removed.  If there are
1501    macros, special token padding is not performed.  */
1502 void
1503 cpp_output_line (cpp_reader *pfile, FILE *fp)
1504 {
1505   const cpp_token *token;
1506
1507   token = cpp_get_token (pfile);
1508   while (token->type != CPP_EOF)
1509     {
1510       cpp_output_token (token, fp);
1511       token = cpp_get_token (pfile);
1512       if (token->flags & PREV_WHITE)
1513         putc (' ', fp);
1514     }
1515
1516   putc ('\n', fp);
1517 }
1518
1519 /* Memory buffers.  Changing these three constants can have a dramatic
1520    effect on performance.  The values here are reasonable defaults,
1521    but might be tuned.  If you adjust them, be sure to test across a
1522    range of uses of cpplib, including heavy nested function-like macro
1523    expansion.  Also check the change in peak memory usage (NJAMD is a
1524    good tool for this).  */
1525 #define MIN_BUFF_SIZE 8000
1526 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1527 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1528         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1529
1530 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1531   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1532 #endif
1533
1534 /* Create a new allocation buffer.  Place the control block at the end
1535    of the buffer, so that buffer overflows will cause immediate chaos.  */
1536 static _cpp_buff *
1537 new_buff (size_t len)
1538 {
1539   _cpp_buff *result;
1540   unsigned char *base;
1541
1542   if (len < MIN_BUFF_SIZE)
1543     len = MIN_BUFF_SIZE;
1544   len = CPP_ALIGN (len);
1545
1546   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1547   result = (_cpp_buff *) (base + len);
1548   result->base = base;
1549   result->cur = base;
1550   result->limit = base + len;
1551   result->next = NULL;
1552   return result;
1553 }
1554
1555 /* Place a chain of unwanted allocation buffers on the free list.  */
1556 void
1557 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1558 {
1559   _cpp_buff *end = buff;
1560
1561   while (end->next)
1562     end = end->next;
1563   end->next = pfile->free_buffs;
1564   pfile->free_buffs = buff;
1565 }
1566
1567 /* Return a free buffer of size at least MIN_SIZE.  */
1568 _cpp_buff *
1569 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1570 {
1571   _cpp_buff *result, **p;
1572
1573   for (p = &pfile->free_buffs;; p = &(*p)->next)
1574     {
1575       size_t size;
1576
1577       if (*p == NULL)
1578         return new_buff (min_size);
1579       result = *p;
1580       size = result->limit - result->base;
1581       /* Return a buffer that's big enough, but don't waste one that's
1582          way too big.  */
1583       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1584         break;
1585     }
1586
1587   *p = result->next;
1588   result->next = NULL;
1589   result->cur = result->base;
1590   return result;
1591 }
1592
1593 /* Creates a new buffer with enough space to hold the uncommitted
1594    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1595    the excess bytes to the new buffer.  Chains the new buffer after
1596    BUFF, and returns the new buffer.  */
1597 _cpp_buff *
1598 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1599 {
1600   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1601   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1602
1603   buff->next = new_buff;
1604   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1605   return new_buff;
1606 }
1607
1608 /* Creates a new buffer with enough space to hold the uncommitted
1609    remaining bytes of the buffer pointed to by BUFF, and at least
1610    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1611    Chains the new buffer before the buffer pointed to by BUFF, and
1612    updates the pointer to point to the new buffer.  */
1613 void
1614 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1615 {
1616   _cpp_buff *new_buff, *old_buff = *pbuff;
1617   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1618
1619   new_buff = _cpp_get_buff (pfile, size);
1620   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1621   new_buff->next = old_buff;
1622   *pbuff = new_buff;
1623 }
1624
1625 /* Free a chain of buffers starting at BUFF.  */
1626 void
1627 _cpp_free_buff (_cpp_buff *buff)
1628 {
1629   _cpp_buff *next;
1630
1631   for (; buff; buff = next)
1632     {
1633       next = buff->next;
1634       free (buff->base);
1635     }
1636 }
1637
1638 /* Allocate permanent, unaligned storage of length LEN.  */
1639 unsigned char *
1640 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1641 {
1642   _cpp_buff *buff = pfile->u_buff;
1643   unsigned char *result = buff->cur;
1644
1645   if (len > (size_t) (buff->limit - result))
1646     {
1647       buff = _cpp_get_buff (pfile, len);
1648       buff->next = pfile->u_buff;
1649       pfile->u_buff = buff;
1650       result = buff->cur;
1651     }
1652
1653   buff->cur = result + len;
1654   return result;
1655 }
1656
1657 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1658    That buffer is used for growing allocations when saving macro
1659    replacement lists in a #define, and when parsing an answer to an
1660    assertion in #assert, #unassert or #if (and therefore possibly
1661    whilst expanding macros).  It therefore must not be used by any
1662    code that they might call: specifically the lexer and the guts of
1663    the macro expander.
1664
1665    All existing other uses clearly fit this restriction: storing
1666    registered pragmas during initialization.  */
1667 unsigned char *
1668 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1669 {
1670   _cpp_buff *buff = pfile->a_buff;
1671   unsigned char *result = buff->cur;
1672
1673   if (len > (size_t) (buff->limit - result))
1674     {
1675       buff = _cpp_get_buff (pfile, len);
1676       buff->next = pfile->a_buff;
1677       pfile->a_buff = buff;
1678       result = buff->cur;
1679     }
1680
1681   buff->cur = result + len;
1682   return result;
1683 }
1684
1685 /* Say which field of TOK is in use.  */
1686
1687 enum cpp_token_fld_kind
1688 cpp_token_val_index (cpp_token *tok)
1689 {
1690   switch (TOKEN_SPELL (tok))
1691     {
1692     case SPELL_IDENT:
1693       return CPP_TOKEN_FLD_NODE;
1694     case SPELL_LITERAL:
1695       return CPP_TOKEN_FLD_STR;
1696     case SPELL_NONE:
1697       if (tok->type == CPP_MACRO_ARG)
1698         return CPP_TOKEN_FLD_ARG_NO;
1699       else if (tok->type == CPP_PADDING)
1700         return CPP_TOKEN_FLD_SOURCE;
1701       else if (tok->type == CPP_PRAGMA)
1702         return CPP_TOKEN_FLD_STR;
1703       /* else fall through */
1704     default:
1705       return CPP_TOKEN_FLD_NONE;
1706     }
1707 }