gas/app.c

   1 /* This is the Assembler Pre-Processor
   2    Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   3    1999, 2000
   4    Free Software Foundation, Inc.
   5
   6    This file is part of GAS, the GNU Assembler.
   7
   8    GAS is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 2, or (at your option)
  11    any later version.
  12
  13    GAS is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with GAS; see the file COPYING.  If not, write to the Free
  20    Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21    02111-1307, USA.  */
  22
  23 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
  24 /* App, the assembler pre-processor.  This pre-processor strips out excess
  25    spaces, turns single-quoted characters into a decimal constant, and turns
  26    # <number> <filename> <garbage> into a .line <number>\n.file <filename>
  27    pair.  This needs better error-handling.  */
  28
  29 #include <stdio.h>
  30 #include "as.h"                 /* For BAD_CASE() only */
  31
  32 #if (__STDC__ != 1)
  33 #ifndef const
  34 #define const  /* empty */
  35 #endif
  36 #endif
  37
  38 #ifdef TC_M68K
  39 /* Whether we are scrubbing in m68k MRI mode.  This is different from
  40    flag_m68k_mri, because the two flags will be affected by the .mri
  41    pseudo-op at different times.  */
  42 static int scrub_m68k_mri;
  43 #else
  44 #define scrub_m68k_mri 0
  45 #endif
  46
  47 /* The pseudo-op which switches in and out of MRI mode.  See the
  48    comment in do_scrub_chars.  */
  49 static const char mri_pseudo[] = ".mri 0";
  50
  51 #if defined TC_ARM && defined OBJ_ELF
  52 /* The pseudo-op for which we need to special-case `@' characters.
  53    See the comment in do_scrub_chars.  */
  54 static const char   symver_pseudo[] = ".symver";
  55 static const char * symver_state;
  56 #endif
  57
  58 static char lex[256];
  59 static const char symbol_chars[] =
  60 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  61
  62 #define LEX_IS_SYMBOL_COMPONENT         1
  63 #define LEX_IS_WHITESPACE               2
  64 #define LEX_IS_LINE_SEPARATOR           3
  65 #define LEX_IS_COMMENT_START            4
  66 #define LEX_IS_LINE_COMMENT_START       5
  67 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  68 #define LEX_IS_STRINGQUOTE              8
  69 #define LEX_IS_COLON                    9
  70 #define LEX_IS_NEWLINE                  10
  71 #define LEX_IS_ONECHAR_QUOTE            11
  72 #ifdef TC_V850
  73 #define LEX_IS_DOUBLEDASH_1ST           12
  74 #endif
  75 #ifdef TC_M32R
  76 #define DOUBLEBAR_PARALLEL
  77 #endif
  78 #ifdef DOUBLEBAR_PARALLEL
  79 #define LEX_IS_DOUBLEBAR_1ST            13
  80 #endif
  81 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  82 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  83 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  84 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  85 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  86 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  87
  88 static int process_escape PARAMS ((int));
  89
  90 /* FIXME-soon: The entire lexer/parser thingy should be
  91    built statically at compile time rather than dynamically
  92    each and every time the assembler is run.  xoxorich.  */
  93
  94 void
  95 do_scrub_begin (m68k_mri)
  96      int m68k_mri ATTRIBUTE_UNUSED;
  97 {
  98   const char *p;
  99   int c;
 100
 101   lex[' '] = LEX_IS_WHITESPACE;
 102   lex['\t'] = LEX_IS_WHITESPACE;
 103   lex['\r'] = LEX_IS_WHITESPACE;
 104   lex['\n'] = LEX_IS_NEWLINE;
 105   lex[':'] = LEX_IS_COLON;
 106
 107 #ifdef TC_M68K
 108   scrub_m68k_mri = m68k_mri;
 109
 110   if (! m68k_mri)
 111 #endif
 112     {
 113       lex['"'] = LEX_IS_STRINGQUOTE;
 114
 115 #if ! defined (TC_HPPA) && ! defined (TC_I370)
 116       /* I370 uses single-quotes to delimit integer, float constants */
 117       lex['\''] = LEX_IS_ONECHAR_QUOTE;
 118 #endif
 119
 120 #ifdef SINGLE_QUOTE_STRINGS
 121       lex['\''] = LEX_IS_STRINGQUOTE;
 122 #endif
 123     }
 124
 125   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
 126      in state 5 of do_scrub_chars must be changed.  */
 127
 128   /* Note that these override the previous defaults, e.g. if ';' is a
 129      comment char, then it isn't a line separator.  */
 130   for (p = symbol_chars; *p; ++p)
 131     {
 132       lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
 133     }                           /* declare symbol characters */
 134
 135   for (c = 128; c < 256; ++c)
 136     lex[c] = LEX_IS_SYMBOL_COMPONENT;
 137
 138 #ifdef tc_symbol_chars
 139   /* This macro permits the processor to specify all characters which
 140      may appears in an operand.  This will prevent the scrubber from
 141      discarding meaningful whitespace in certain cases.  The i386
 142      backend uses this to support prefixes, which can confuse the
 143      scrubber as to whether it is parsing operands or opcodes.  */
 144   for (p = tc_symbol_chars; *p; ++p)
 145     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
 146 #endif
 147
 148   /* The m68k backend wants to be able to change comment_chars.  */
 149 #ifndef tc_comment_chars
 150 #define tc_comment_chars comment_chars
 151 #endif
 152   for (p = tc_comment_chars; *p; p++)
 153     {
 154       lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
 155     }                           /* declare comment chars */
 156
 157   for (p = line_comment_chars; *p; p++)
 158     {
 159       lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
 160     }                           /* declare line comment chars */
 161
 162   for (p = line_separator_chars; *p; p++)
 163     {
 164       lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
 165     }                           /* declare line separators */
 166
 167   /* Only allow slash-star comments if slash is not in use.
 168      FIXME: This isn't right.  We should always permit them.  */
 169   if (lex['/'] == 0)
 170     {
 171       lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 172     }
 173
 174 #ifdef TC_M68K
 175   if (m68k_mri)
 176     {
 177       lex['\''] = LEX_IS_STRINGQUOTE;
 178       lex[';'] = LEX_IS_COMMENT_START;
 179       lex['*'] = LEX_IS_LINE_COMMENT_START;
 180       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
 181          then it can't be used in an expression.  */
 182       lex['!'] = LEX_IS_LINE_COMMENT_START;
 183     }
 184 #endif
 185
 186 #ifdef TC_V850
 187   lex['-'] = LEX_IS_DOUBLEDASH_1ST;
 188 #endif
 189 #ifdef DOUBLEBAR_PARALLEL
 190   lex['|'] = LEX_IS_DOUBLEBAR_1ST;
 191 #endif
 192 #ifdef TC_D30V
 193   /* must do this is we want VLIW instruction with "->" or "<-" */
 194   lex['-'] = LEX_IS_SYMBOL_COMPONENT;
 195 #endif
 196 }                               /* do_scrub_begin() */
 197
 198 /* Saved state of the scrubber */
 199 static int state;
 200 static int old_state;
 201 static char *out_string;
 202 static char out_buf[20];
 203 static int add_newlines;
 204 static char *saved_input;
 205 static int saved_input_len;
 206 static char input_buffer[32 * 1024];
 207 static const char *mri_state;
 208 static char mri_last_ch;
 209
 210 /* Data structure for saving the state of app across #include's.  Note that
 211    app is called asynchronously to the parsing of the .include's, so our
 212    state at the time .include is interpreted is completely unrelated.
 213    That's why we have to save it all.  */
 214
 215 struct app_save {
 216   int          state;
 217   int          old_state;
 218   char *       out_string;
 219   char         out_buf[sizeof (out_buf)];
 220   int          add_newlines;
 221   char *       saved_input;
 222   int          saved_input_len;
 223 #ifdef TC_M68K
 224   int          scrub_m68k_mri;
 225 #endif
 226   const char * mri_state;
 227   char         mri_last_ch;
 228 #if defined TC_ARM && defined OBJ_ELF
 229   const char * symver_state;
 230 #endif
 231 };
 232
 233 char *
 234 app_push ()
 235 {
 236   register struct app_save *saved;
 237
 238   saved = (struct app_save *) xmalloc (sizeof (*saved));
 239   saved->state = state;
 240   saved->old_state = old_state;
 241   saved->out_string = out_string;
 242   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
 243   saved->add_newlines = add_newlines;
 244   if (saved_input == NULL)
 245     saved->saved_input = NULL;
 246   else
 247     {
 248       saved->saved_input = xmalloc (saved_input_len);
 249       memcpy (saved->saved_input, saved_input, saved_input_len);
 250       saved->saved_input_len = saved_input_len;
 251     }
 252 #ifdef TC_M68K
 253   saved->scrub_m68k_mri = scrub_m68k_mri;
 254 #endif
 255   saved->mri_state = mri_state;
 256   saved->mri_last_ch = mri_last_ch;
 257 #if defined TC_ARM && defined OBJ_ELF
 258   saved->symver_state = symver_state;
 259 #endif
 260
 261   /* do_scrub_begin() is not useful, just wastes time.  */
 262
 263   state = 0;
 264   saved_input = NULL;
 265
 266   return (char *) saved;
 267 }
 268
 269 void
 270 app_pop (arg)
 271      char *arg;
 272 {
 273   register struct app_save *saved = (struct app_save *) arg;
 274
 275   /* There is no do_scrub_end ().  */
 276   state = saved->state;
 277   old_state = saved->old_state;
 278   out_string = saved->out_string;
 279   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
 280   add_newlines = saved->add_newlines;
 281   if (saved->saved_input == NULL)
 282     saved_input = NULL;
 283   else
 284     {
 285       assert (saved->saved_input_len <= (int) (sizeof input_buffer));
 286       memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
 287       saved_input = input_buffer;
 288       saved_input_len = saved->saved_input_len;
 289       free (saved->saved_input);
 290     }
 291 #ifdef TC_M68K
 292   scrub_m68k_mri = saved->scrub_m68k_mri;
 293 #endif
 294   mri_state = saved->mri_state;
 295   mri_last_ch = saved->mri_last_ch;
 296 #if defined TC_ARM && defined OBJ_ELF
 297   symver_state = saved->symver_state;
 298 #endif
 299
 300   free (arg);
 301 }                               /* app_pop() */
 302
 303 /* @@ This assumes that \n &c are the same on host and target.  This is not
 304    necessarily true.  */
 305 static int
 306 process_escape (ch)
 307      int ch;
 308 {
 309   switch (ch)
 310     {
 311     case 'b':
 312       return '\b';
 313     case 'f':
 314       return '\f';
 315     case 'n':
 316       return '\n';
 317     case 'r':
 318       return '\r';
 319     case 't':
 320       return '\t';
 321     case '\'':
 322       return '\'';
 323     case '"':
 324       return '\"';
 325     default:
 326       return ch;
 327     }
 328 }
 329
 330 /* This function is called to process input characters.  The GET
 331    parameter is used to retrieve more input characters.  GET should
 332    set its parameter to point to a buffer, and return the length of
 333    the buffer; it should return 0 at end of file.  The scrubbed output
 334    characters are put into the buffer starting at TOSTART; the TOSTART
 335    buffer is TOLEN bytes in length.  The function returns the number
 336    of scrubbed characters put into TOSTART.  This will be TOLEN unless
 337    end of file was seen.  This function is arranged as a state
 338    machine, and saves its state so that it may return at any point.
 339    This is the way the old code used to work.  */
 340
 341 int
 342 do_scrub_chars (get, tostart, tolen)
 343      int (*get) PARAMS ((char *, int));
 344      char *tostart;
 345      int tolen;
 346 {
 347   char *to = tostart;
 348   char *toend = tostart + tolen;
 349   char *from;
 350   char *fromend;
 351   int fromlen;
 352   register int ch, ch2 = 0;
 353
 354   /*State 0: beginning of normal line
 355           1: After first whitespace on line (flush more white)
 356           2: After first non-white (opcode) on line (keep 1white)
 357           3: after second white on line (into operands) (flush white)
 358           4: after putting out a .line, put out digits
 359           5: parsing a string, then go to old-state
 360           6: putting out \ escape in a "d string.
 361           7: After putting out a .appfile, put out string.
 362           8: After putting out a .appfile string, flush until newline.
 363           9: After seeing symbol char in state 3 (keep 1white after symchar)
 364          10: After seeing whitespace in state 9 (keep white before symchar)
 365          11: After seeing a symbol character in state 0 (eg a label definition)
 366          -1: output string in out_string and go to the state in old_state
 367          -2: flush text until a '*' '/' is seen, then go to state old_state
 368 #ifdef TC_V850
 369          12: After seeing a dash, looking for a second dash as a start of comment.
 370 #endif
 371 #ifdef DOUBLEBAR_PARALLEL
 372          13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
 373 #endif
 374           */
 375
 376   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
 377      constructs like ``.loc 1 20''.  This was turning into ``.loc
 378      120''.  States 9 and 10 ensure that a space is never dropped in
 379      between characters which could appear in a identifier.  Ian
 380      Taylor, ian@cygnus.com.
 381
 382      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
 383      correctly on the PA (and any other target where colons are optional).
 384      Jeff Law, law@cs.utah.edu.
 385
 386      I added state 13 so that something like "cmp r1, r2 || trap #1" does not
 387      get squashed into "cmp r1,r2||trap#1", with the all important space
 388      between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
 389
 390   /* This macro gets the next input character.  */
 391
 392 #define GET()                                                   \
 393   (from < fromend                                               \
 394    ? * (unsigned char *) (from++)                               \
 395    : (saved_input = NULL,                                       \
 396       fromlen = (*get) (input_buffer, sizeof input_buffer),     \
 397       from = input_buffer,                                      \
 398       fromend = from + fromlen,                                 \
 399       (fromlen == 0                                             \
 400        ? EOF                                                    \
 401        : * (unsigned char *) (from++))))
 402
 403   /* This macro pushes a character back on the input stream.  */
 404
 405 #define UNGET(uch) (*--from = (uch))
 406
 407   /* This macro puts a character into the output buffer.  If this
 408      character fills the output buffer, this macro jumps to the label
 409      TOFULL.  We use this rather ugly approach because we need to
 410      handle two different termination conditions: EOF on the input
 411      stream, and a full output buffer.  It would be simpler if we
 412      always read in the entire input stream before processing it, but
 413      I don't want to make such a significant change to the assembler's
 414      memory usage.  */
 415
 416 #define PUT(pch)                        \
 417   do                                    \
 418     {                                   \
 419       *to++ = (pch);                    \
 420       if (to >= toend)                  \
 421         goto tofull;                    \
 422     }                                   \
 423   while (0)
 424
 425   if (saved_input != NULL)
 426     {
 427       from = saved_input;
 428       fromend = from + saved_input_len;
 429     }
 430   else
 431     {
 432       fromlen = (*get) (input_buffer, sizeof input_buffer);
 433       if (fromlen == 0)
 434         return 0;
 435       from = input_buffer;
 436       fromend = from + fromlen;
 437     }
 438
 439   while (1)
 440     {
 441       /* The cases in this switch end with continue, in order to
 442          branch back to the top of this while loop and generate the
 443          next output character in the appropriate state.  */
 444       switch (state)
 445         {
 446         case -1:
 447           ch = *out_string++;
 448           if (*out_string == '\0')
 449             {
 450               state = old_state;
 451               old_state = 3;
 452             }
 453           PUT (ch);
 454           continue;
 455
 456         case -2:
 457           for (;;)
 458             {
 459               do
 460                 {
 461                   ch = GET ();
 462
 463                   if (ch == EOF)
 464                     {
 465                       as_warn (_("end of file in comment"));
 466                       goto fromeof;
 467                     }
 468
 469                   if (ch == '\n')
 470                     PUT ('\n');
 471                 }
 472               while (ch != '*');
 473
 474               while ((ch = GET ()) == '*')
 475                 ;
 476
 477               if (ch == EOF)
 478                 {
 479                   as_warn (_("end of file in comment"));
 480                   goto fromeof;
 481                 }
 482
 483               if (ch == '/')
 484                 break;
 485
 486               UNGET (ch);
 487             }
 488
 489           state = old_state;
 490           UNGET (' ');
 491           continue;
 492
 493         case 4:
 494           ch = GET ();
 495           if (ch == EOF)
 496             goto fromeof;
 497           else if (ch >= '0' && ch <= '9')
 498             PUT (ch);
 499           else
 500             {
 501               while (ch != EOF && IS_WHITESPACE (ch))
 502                 ch = GET ();
 503               if (ch == '"')
 504                 {
 505                   UNGET (ch);
 506                   if (scrub_m68k_mri)
 507                     out_string = "\n\tappfile ";
 508                   else
 509                     out_string = "\n\t.appfile ";
 510                   old_state = 7;
 511                   state = -1;
 512                   PUT (*out_string++);
 513                 }
 514               else
 515                 {
 516                   while (ch != EOF && ch != '\n')
 517                     ch = GET ();
 518                   state = 0;
 519                   PUT (ch);
 520                 }
 521             }
 522           continue;
 523
 524         case 5:
 525           /* We are going to copy everything up to a quote character,
 526              with special handling for a backslash.  We try to
 527              optimize the copying in the simple case without using the
 528              GET and PUT macros.  */
 529           {
 530             char *s;
 531             int len;
 532
 533             for (s = from; s < fromend; s++)
 534               {
 535                 ch = *s;
 536                 /* This condition must be changed if the type of any
 537                    other character can be LEX_IS_STRINGQUOTE.  */
 538                 if (ch == '\\'
 539                     || ch == '"'
 540                     || ch == '\''
 541                     || ch == '\n')
 542                   break;
 543               }
 544             len = s - from;
 545             if (len > toend - to)
 546               len = toend - to;
 547             if (len > 0)
 548               {
 549                 memcpy (to, from, len);
 550                 to += len;
 551                 from += len;
 552               }
 553           }
 554
 555           ch = GET ();
 556           if (ch == EOF)
 557             {
 558               as_warn (_("end of file in string: inserted '\"'"));
 559               state = old_state;
 560               UNGET ('\n');
 561               PUT ('"');
 562             }
 563           else if (lex[ch] == LEX_IS_STRINGQUOTE)
 564             {
 565               state = old_state;
 566               PUT (ch);
 567             }
 568 #ifndef NO_STRING_ESCAPES
 569           else if (ch == '\\')
 570             {
 571               state = 6;
 572               PUT (ch);
 573             }
 574 #endif
 575           else if (scrub_m68k_mri && ch == '\n')
 576             {
 577               /* Just quietly terminate the string.  This permits lines like
 578                    bne  label   loop if we haven't reach end yet
 579                  */
 580               state = old_state;
 581               UNGET (ch);
 582               PUT ('\'');
 583             }
 584           else
 585             {
 586               PUT (ch);
 587             }
 588           continue;
 589
 590         case 6:
 591           state = 5;
 592           ch = GET ();
 593           switch (ch)
 594             {
 595               /* Handle strings broken across lines, by turning '\n' into
 596                  '\\' and 'n'.  */
 597             case '\n':
 598               UNGET ('n');
 599               add_newlines++;
 600               PUT ('\\');
 601               continue;
 602
 603             case '"':
 604             case '\\':
 605             case 'b':
 606             case 'f':
 607             case 'n':
 608             case 'r':
 609             case 't':
 610             case 'v':
 611             case 'x':
 612             case 'X':
 613             case '0':
 614             case '1':
 615             case '2':
 616             case '3':
 617             case '4':
 618             case '5':
 619             case '6':
 620             case '7':
 621               break;
 622 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 623             default:
 624               as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
 625               break;
 626 #else  /* ONLY_STANDARD_ESCAPES */
 627             default:
 628               /* Accept \x as x for any x */
 629               break;
 630 #endif /* ONLY_STANDARD_ESCAPES */
 631
 632             case EOF:
 633               as_warn (_("End of file in string: '\"' inserted"));
 634               PUT ('"');
 635               continue;
 636             }
 637           PUT (ch);
 638           continue;
 639
 640         case 7:
 641           ch = GET ();
 642           state = 5;
 643           old_state = 8;
 644           if (ch == EOF)
 645             goto fromeof;
 646           PUT (ch);
 647           continue;
 648
 649         case 8:
 650           do
 651             ch = GET ();
 652           while (ch != '\n' && ch != EOF);
 653           if (ch == EOF)
 654             goto fromeof;
 655           state = 0;
 656           PUT (ch);
 657           continue;
 658         }
 659
 660       /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
 661
 662       /* flushchar: */
 663       ch = GET ();
 664
 665     recycle:
 666
 667 #if defined TC_ARM && defined OBJ_ELF
 668       /* We need to watch out for .symver directives.  See the comment later
 669          in this function.  */
 670       if (symver_state == NULL)
 671         {
 672           if ((state == 0 || state == 1) && ch == symver_pseudo[0])
 673             symver_state = symver_pseudo + 1;
 674         }
 675       else
 676         {
 677           /* We advance to the next state if we find the right
 678              character.  */
 679           if (ch != '\0' && (*symver_state == ch))
 680             ++symver_state;
 681           else if (*symver_state != '\0')
 682             /* We did not get the expected character, or we didn't
 683                get a valid terminating character after seeing the
 684                entire pseudo-op, so we must go back to the beginning.  */
 685             symver_state = NULL;
 686           else
 687             {
 688               /* We've read the entire pseudo-op.  If this is the end
 689                  of the line, go back to the beginning.  */
 690               if (IS_NEWLINE (ch))
 691                 symver_state = NULL;
 692             }
 693         }
 694 #endif /* TC_ARM && OBJ_ELF */
 695
 696 #ifdef TC_M68K
 697       /* We want to have pseudo-ops which control whether we are in
 698          MRI mode or not.  Unfortunately, since m68k MRI mode affects
 699          the scrubber, that means that we need a special purpose
 700          recognizer here.  */
 701       if (mri_state == NULL)
 702         {
 703           if ((state == 0 || state == 1)
 704               && ch == mri_pseudo[0])
 705             mri_state = mri_pseudo + 1;
 706         }
 707       else
 708         {
 709           /* We advance to the next state if we find the right
 710              character, or if we need a space character and we get any
 711              whitespace character, or if we need a '0' and we get a
 712              '1' (this is so that we only need one state to handle
 713              ``.mri 0'' and ``.mri 1'').  */
 714           if (ch != '\0'
 715               && (*mri_state == ch
 716                   || (*mri_state == ' '
 717                       && lex[ch] == LEX_IS_WHITESPACE)
 718                   || (*mri_state == '0'
 719                       && ch == '1')))
 720             {
 721               mri_last_ch = ch;
 722               ++mri_state;
 723             }
 724           else if (*mri_state != '\0'
 725                    || (lex[ch] != LEX_IS_WHITESPACE
 726                        && lex[ch] != LEX_IS_NEWLINE))
 727             {
 728               /* We did not get the expected character, or we didn't
 729                  get a valid terminating character after seeing the
 730                  entire pseudo-op, so we must go back to the
 731                  beginning.  */
 732               mri_state = NULL;
 733             }
 734           else
 735             {
 736               /* We've read the entire pseudo-op.  mips_last_ch is
 737                  either '0' or '1' indicating whether to enter or
 738                  leave MRI mode.  */
 739               do_scrub_begin (mri_last_ch == '1');
 740               mri_state = NULL;
 741
 742               /* We continue handling the character as usual.  The
 743                  main gas reader must also handle the .mri pseudo-op
 744                  to control expression parsing and the like.  */
 745             }
 746         }
 747 #endif
 748
 749       if (ch == EOF)
 750         {
 751           if (state != 0)
 752             {
 753               as_warn (_("end of file not at end of a line; newline inserted"));
 754               state = 0;
 755               PUT ('\n');
 756             }
 757           goto fromeof;
 758         }
 759
 760       switch (lex[ch])
 761         {
 762         case LEX_IS_WHITESPACE:
 763           do
 764             {
 765               ch = GET ();
 766             }
 767           while (ch != EOF && IS_WHITESPACE (ch));
 768           if (ch == EOF)
 769             goto fromeof;
 770
 771           if (state == 0)
 772             {
 773               /* Preserve a single whitespace character at the
 774                  beginning of a line.  */
 775               state = 1;
 776               UNGET (ch);
 777               PUT (' ');
 778               break;
 779             }
 780
 781 #ifdef KEEP_WHITE_AROUND_COLON
 782           if (lex[ch] == LEX_IS_COLON)
 783             {
 784               /* Only keep this white if there's no white *after* the
 785                  colon.  */
 786               ch2 = GET ();
 787               UNGET (ch2);
 788               if (!IS_WHITESPACE (ch2))
 789                 {
 790                   state = 9;
 791                   UNGET (ch);
 792                   PUT (' ');
 793                   break;
 794                 }
 795             }
 796 #endif
 797           if (IS_COMMENT (ch)
 798               || ch == '/'
 799               || IS_LINE_SEPARATOR (ch))
 800             {
 801               if (scrub_m68k_mri)
 802                 {
 803                   /* In MRI mode, we keep these spaces.  */
 804                   UNGET (ch);
 805                   PUT (' ');
 806                   break;
 807                 }
 808               goto recycle;
 809             }
 810
 811           /* If we're in state 2 or 11, we've seen a non-white
 812              character followed by whitespace.  If the next character
 813              is ':', this is whitespace after a label name which we
 814              normally must ignore.  In MRI mode, though, spaces are
 815              not permitted between the label and the colon.  */
 816           if ((state == 2 || state == 11)
 817               && lex[ch] == LEX_IS_COLON
 818               && ! scrub_m68k_mri)
 819             {
 820               state = 1;
 821               PUT (ch);
 822               break;
 823             }
 824
 825           switch (state)
 826             {
 827             case 0:
 828               state++;
 829               goto recycle;     /* Punted leading sp */
 830             case 1:
 831               /* We can arrive here if we leave a leading whitespace
 832                  character at the beginning of a line.  */
 833               goto recycle;
 834             case 2:
 835               state = 3;
 836               if (to + 1 < toend)
 837                 {
 838                   /* Optimize common case by skipping UNGET/GET.  */
 839                   PUT (' ');    /* Sp after opco */
 840                   goto recycle;
 841                 }
 842               UNGET (ch);
 843               PUT (' ');
 844               break;
 845             case 3:
 846               if (scrub_m68k_mri)
 847                 {
 848                   /* In MRI mode, we keep these spaces.  */
 849                   UNGET (ch);
 850                   PUT (' ');
 851                   break;
 852                 }
 853               goto recycle;     /* Sp in operands */
 854             case 9:
 855             case 10:
 856               if (scrub_m68k_mri)
 857                 {
 858                   /* In MRI mode, we keep these spaces.  */
 859                   state = 3;
 860                   UNGET (ch);
 861                   PUT (' ');
 862                   break;
 863                 }
 864               state = 10;       /* Sp after symbol char */
 865               goto recycle;
 866             case 11:
 867               if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
 868                 state = 1;
 869               else
 870                 {
 871                   /* We know that ch is not ':', since we tested that
 872                      case above.  Therefore this is not a label, so it
 873                      must be the opcode, and we've just seen the
 874                      whitespace after it.  */
 875                   state = 3;
 876                 }
 877               UNGET (ch);
 878               PUT (' ');        /* Sp after label definition.  */
 879               break;
 880             default:
 881               BAD_CASE (state);
 882             }
 883           break;
 884
 885         case LEX_IS_TWOCHAR_COMMENT_1ST:
 886           ch2 = GET ();
 887           if (ch2 == '*')
 888             {
 889               for (;;)
 890                 {
 891                   do
 892                     {
 893                       ch2 = GET ();
 894                       if (ch2 != EOF && IS_NEWLINE (ch2))
 895                         add_newlines++;
 896                     }
 897                   while (ch2 != EOF && ch2 != '*');
 898
 899                   while (ch2 == '*')
 900                     ch2 = GET ();
 901
 902                   if (ch2 == EOF || ch2 == '/')
 903                     break;
 904
 905                   /* This UNGET will ensure that we count newlines
 906                      correctly.  */
 907                   UNGET (ch2);
 908                 }
 909
 910               if (ch2 == EOF)
 911                 as_warn (_("end of file in multiline comment"));
 912
 913               ch = ' ';
 914               goto recycle;
 915             }
 916 #ifdef DOUBLESLASH_LINE_COMMENTS
 917           else if (ch2 == '/')
 918             {
 919               do
 920                 {
 921                   ch = GET ();
 922                 }
 923               while (ch != EOF && !IS_NEWLINE (ch));
 924               if (ch == EOF)
 925                 as_warn ("end of file in comment; newline inserted");
 926               state = 0;
 927               PUT ('\n');
 928               break;
 929             }
 930 #endif
 931           else
 932             {
 933               if (ch2 != EOF)
 934                 UNGET (ch2);
 935               if (state == 9 || state == 10)
 936                 state = 3;
 937               PUT (ch);
 938             }
 939           break;
 940
 941         case LEX_IS_STRINGQUOTE:
 942           if (state == 10)
 943             {
 944               /* Preserve the whitespace in foo "bar" */
 945               UNGET (ch);
 946               state = 3;
 947               PUT (' ');
 948
 949               /* PUT didn't jump out.  We could just break, but we
 950                  know what will happen, so optimize a bit.  */
 951               ch = GET ();
 952               old_state = 3;
 953             }
 954           else if (state == 9)
 955             old_state = 3;
 956           else
 957             old_state = state;
 958           state = 5;
 959           PUT (ch);
 960           break;
 961
 962 #ifndef IEEE_STYLE
 963         case LEX_IS_ONECHAR_QUOTE:
 964           if (state == 10)
 965             {
 966               /* Preserve the whitespace in foo 'b' */
 967               UNGET (ch);
 968               state = 3;
 969               PUT (' ');
 970               break;
 971             }
 972           ch = GET ();
 973           if (ch == EOF)
 974             {
 975               as_warn (_("end of file after a one-character quote; \\0 inserted"));
 976               ch = 0;
 977             }
 978           if (ch == '\\')
 979             {
 980               ch = GET ();
 981               if (ch == EOF)
 982                 {
 983                   as_warn (_("end of file in escape character"));
 984                   ch = '\\';
 985                 }
 986               else
 987                 ch = process_escape (ch);
 988             }
 989           sprintf (out_buf, "%d", (int) (unsigned char) ch);
 990
 991           /* None of these 'x constants for us.  We want 'x'.  */
 992           if ((ch = GET ()) != '\'')
 993             {
 994 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 995               as_warn (_("Missing close quote: (assumed)"));
 996 #else
 997               if (ch != EOF)
 998                 UNGET (ch);
 999 #endif
1000             }
1001           if (strlen (out_buf) == 1)
1002             {
1003               PUT (out_buf[0]);
1004               break;
1005             }
1006           if (state == 9)
1007             old_state = 3;
1008           else
1009             old_state = state;
1010           state = -1;
1011           out_string = out_buf;
1012           PUT (*out_string++);
1013           break;
1014 #endif
1015
1016         case LEX_IS_COLON:
1017 #ifdef KEEP_WHITE_AROUND_COLON
1018           state = 9;
1019 #else
1020           if (state == 9 || state == 10)
1021             state = 3;
1022           else if (state != 3)
1023             state = 1;
1024 #endif
1025           PUT (ch);
1026           break;
1027
1028         case LEX_IS_NEWLINE:
1029           /* Roll out a bunch of newlines from inside comments, etc.  */
1030           if (add_newlines)
1031             {
1032               --add_newlines;
1033               UNGET (ch);
1034             }
1035           /* Fall through.  */
1036
1037         case LEX_IS_LINE_SEPARATOR:
1038           state = 0;
1039           PUT (ch);
1040           break;
1041
1042 #ifdef TC_V850
1043         case LEX_IS_DOUBLEDASH_1ST:
1044           ch2 = GET ();
1045           if (ch2 != '-')
1046             {
1047               UNGET (ch2);
1048               goto de_fault;
1049             }
1050           /* Read and skip to end of line.  */
1051           do
1052             {
1053               ch = GET ();
1054             }
1055           while (ch != EOF && ch != '\n');
1056           if (ch == EOF)
1057             {
1058               as_warn (_("end of file in comment; newline inserted"));
1059             }
1060           state = 0;
1061           PUT ('\n');
1062           break;
1063 #endif
1064 #ifdef DOUBLEBAR_PARALLEL
1065         case LEX_IS_DOUBLEBAR_1ST:
1066           ch2 = GET ();
1067           if (ch2 != '|')
1068             {
1069               UNGET (ch2);
1070               goto de_fault;
1071             }
1072           /* Reset back to state 1 and pretend that we are parsing a line from
1073              just after the first white space.  */
1074           state = 1;
1075           PUT ('|');
1076           PUT ('|');
1077           break;
1078 #endif
1079         case LEX_IS_LINE_COMMENT_START:
1080           /* FIXME-someday: The two character comment stuff was badly
1081              thought out.  On i386, we want '/' as line comment start
1082              AND we want C style comments.  hence this hack.  The
1083              whole lexical process should be reworked.  xoxorich.  */
1084           if (ch == '/')
1085             {
1086               ch2 = GET ();
1087               if (ch2 == '*')
1088                 {
1089                   old_state = 3;
1090                   state = -2;
1091                   break;
1092                 }
1093               else
1094                 {
1095                   UNGET (ch2);
1096                 }
1097             } /* bad hack */
1098
1099           if (state == 0 || state == 1) /* Only comment at start of line.  */
1100             {
1101               int startch;
1102
1103               startch = ch;
1104
1105               do
1106                 {
1107                   ch = GET ();
1108                 }
1109               while (ch != EOF && IS_WHITESPACE (ch));
1110               if (ch == EOF)
1111                 {
1112                   as_warn (_("end of file in comment; newline inserted"));
1113                   PUT ('\n');
1114                   break;
1115                 }
1116               if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1117                 {
1118                   /* Not a cpp line.  */
1119                   while (ch != EOF && !IS_NEWLINE (ch))
1120                     ch = GET ();
1121                   if (ch == EOF)
1122                     as_warn (_("EOF in Comment: Newline inserted"));
1123                   state = 0;
1124                   PUT ('\n');
1125                   break;
1126                 }
1127               /* Looks like `# 123 "filename"' from cpp.  */
1128               UNGET (ch);
1129               old_state = 4;
1130               state = -1;
1131               if (scrub_m68k_mri)
1132                 out_string = "\tappline ";
1133               else
1134                 out_string = "\t.appline ";
1135               PUT (*out_string++);
1136               break;
1137             }
1138
1139 #ifdef TC_D10V
1140           /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1141              Trap is the only short insn that has a first operand that is
1142              neither register nor label.
1143              We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1144              We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1145              already LEX_IS_LINE_COMMENT_START.  However, it is the
1146              only character in line_comment_chars for d10v, hence we
1147              can recognize it as such.  */
1148           /* An alternative approach would be to reset the state to 1 when
1149              we see '||', '<'- or '->', but that seems to be overkill.  */
1150           if (state == 10)
1151             PUT (' ');
1152 #endif
1153           /* We have a line comment character which is not at the
1154              start of a line.  If this is also a normal comment
1155              character, fall through.  Otherwise treat it as a default
1156              character.  */
1157           if (strchr (tc_comment_chars, ch) == NULL
1158               && (! scrub_m68k_mri
1159                   || (ch != '!' && ch != '*')))
1160             goto de_fault;
1161           if (scrub_m68k_mri
1162               && (ch == '!' || ch == '*' || ch == '#')
1163               && state != 1
1164               && state != 10)
1165             goto de_fault;
1166           /* Fall through.  */
1167         case LEX_IS_COMMENT_START:
1168 #if defined TC_ARM && defined OBJ_ELF
1169           /* On the ARM, `@' is the comment character.
1170              Unfortunately this is also a special character in ELF .symver
1171              directives (and .type, though we deal with those another way).
1172              So we check if this line is such a directive, and treat
1173              the character as default if so.  This is a hack.  */
1174           if ((symver_state != NULL) && (*symver_state == 0))
1175             goto de_fault;
1176 #endif
1177 #ifdef WARN_COMMENTS
1178           if (!found_comment)
1179             as_where (&found_comment_file, &found_comment);
1180 #endif
1181           do
1182             {
1183               ch = GET ();
1184             }
1185           while (ch != EOF && !IS_NEWLINE (ch));
1186           if (ch == EOF)
1187             as_warn (_("end of file in comment; newline inserted"));
1188           state = 0;
1189           PUT ('\n');
1190           break;
1191
1192         case LEX_IS_SYMBOL_COMPONENT:
1193           if (state == 10)
1194             {
1195               /* This is a symbol character following another symbol
1196                  character, with whitespace in between.  We skipped
1197                  the whitespace earlier, so output it now.  */
1198               UNGET (ch);
1199               state = 3;
1200               PUT (' ');
1201               break;
1202             }
1203
1204           if (state == 3)
1205             state = 9;
1206
1207           /* This is a common case.  Quickly copy CH and all the
1208              following symbol component or normal characters.  */
1209           if (to + 1 < toend
1210               && mri_state == NULL
1211 #if defined TC_ARM && defined OBJ_ELF
1212               && symver_state == NULL
1213 #endif
1214               )
1215             {
1216               char *s;
1217               int len;
1218
1219               for (s = from; s < fromend; s++)
1220                 {
1221                   int type;
1222
1223                   ch2 = *(unsigned char *) s;
1224                   type = lex[ch2];
1225                   if (type != 0
1226                       && type != LEX_IS_SYMBOL_COMPONENT)
1227                     break;
1228                 }
1229               if (s > from)
1230                 {
1231                   /* Handle the last character normally, for
1232                      simplicity.  */
1233                   --s;
1234                 }
1235               len = s - from;
1236               if (len > (toend - to) - 1)
1237                 len = (toend - to) - 1;
1238               if (len > 0)
1239                 {
1240                   PUT (ch);
1241                   if (len > 8)
1242                     {
1243                       memcpy (to, from, len);
1244                       to += len;
1245                       from += len;
1246                     }
1247                   else
1248                     {
1249                       switch (len)
1250                         {
1251                         case 8: *to++ = *from++;
1252                         case 7: *to++ = *from++;
1253                         case 6: *to++ = *from++;
1254                         case 5: *to++ = *from++;
1255                         case 4: *to++ = *from++;
1256                         case 3: *to++ = *from++;
1257                         case 2: *to++ = *from++;
1258                         case 1: *to++ = *from++;
1259                         }
1260                     }
1261                   ch = GET ();
1262                 }
1263             }
1264
1265           /* Fall through.  */
1266         default:
1267         de_fault:
1268           /* Some relatively `normal' character.  */
1269           if (state == 0)
1270             {
1271               state = 11;       /* Now seeing label definition */
1272             }
1273           else if (state == 1)
1274             {
1275               state = 2;        /* Ditto */
1276             }
1277           else if (state == 9)
1278             {
1279               if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
1280                 state = 3;
1281             }
1282           else if (state == 10)
1283             {
1284               if (ch == '\\')
1285                 {
1286                   /* Special handling for backslash: a backslash may
1287                      be the beginning of a formal parameter (of a
1288                      macro) following another symbol character, with
1289                      whitespace in between.  If that is the case, we
1290                      output a space before the parameter.  Strictly
1291                      speaking, correct handling depends upon what the
1292                      macro parameter expands into; if the parameter
1293                      expands into something which does not start with
1294                      an operand character, then we don't want to keep
1295                      the space.  We don't have enough information to
1296                      make the right choice, so here we are making the
1297                      choice which is more likely to be correct.  */
1298                   PUT (' ');
1299                 }
1300
1301               state = 3;
1302             }
1303           PUT (ch);
1304           break;
1305         }
1306     }
1307
1308   /*NOTREACHED*/
1309
1310  fromeof:
1311   /* We have reached the end of the input.  */
1312   return to - tostart;
1313
1314  tofull:
1315   /* The output buffer is full.  Save any input we have not yet
1316      processed.  */
1317   if (fromend > from)
1318     {
1319       saved_input = from;
1320       saved_input_len = fromend - from;
1321     }
1322   else
1323     saved_input = NULL;
1324
1325   return to - tostart;
1326 }
1327
1328 /* end of app.c */