gas/app.c

   1 /* This is the Assembler Pre-Processor
   2    Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 97, 98, 1999
   3    Free Software Foundation, Inc.
   4
   5    This file is part of GAS, the GNU Assembler.
   6
   7    GAS is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    GAS is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GAS; see the file COPYING.  If not, write to the Free
  19    Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20    02111-1307, USA.  */
  21
  22 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
  23 /* App, the assembler pre-processor.  This pre-processor strips out excess
  24    spaces, turns single-quoted characters into a decimal constant, and turns
  25    # <number> <filename> <garbage> into a .line <number>\n.file <filename>
  26    pair.  This needs better error-handling.  */
  27
  28 #include <stdio.h>
  29 #include "as.h"                 /* For BAD_CASE() only */
  30
  31 #if (__STDC__ != 1)
  32 #ifndef const
  33 #define const  /* empty */
  34 #endif
  35 #endif
  36
  37 /* Whether we are scrubbing in m68k MRI mode.  This is different from
  38    flag_m68k_mri, because the two flags will be affected by the .mri
  39    pseudo-op at different times.  */
  40 static int scrub_m68k_mri;
  41
  42 /* The pseudo-op which switches in and out of MRI mode.  See the
  43    comment in do_scrub_chars.  */
  44 static const char mri_pseudo[] = ".mri 0";
  45
  46 #if defined TC_ARM && defined OBJ_ELF
  47 /* The pseudo-op for which we need to special-case `@' characters.
  48    See the comment in do_scrub_chars.  */
  49 static const char   symver_pseudo[] = ".symver";
  50 static const char * symver_state;
  51 #endif
  52
  53 static char lex[256];
  54 static const char symbol_chars[] =
  55 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  56
  57 #define LEX_IS_SYMBOL_COMPONENT         1
  58 #define LEX_IS_WHITESPACE               2
  59 #define LEX_IS_LINE_SEPARATOR           3
  60 #define LEX_IS_COMMENT_START            4
  61 #define LEX_IS_LINE_COMMENT_START       5
  62 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  63 #define LEX_IS_STRINGQUOTE              8
  64 #define LEX_IS_COLON                    9
  65 #define LEX_IS_NEWLINE                  10
  66 #define LEX_IS_ONECHAR_QUOTE            11
  67 #ifdef TC_V850
  68 #define LEX_IS_DOUBLEDASH_1ST           12
  69 #endif
  70 #ifdef TC_M32R
  71 #define LEX_IS_DOUBLEBAR_1ST            13
  72 #endif
  73 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  74 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  75 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  76 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  77 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  78 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  79
  80 static int process_escape PARAMS ((int));
  81
  82 /* FIXME-soon: The entire lexer/parser thingy should be
  83    built statically at compile time rather than dynamically
  84    each and every time the assembler is run.  xoxorich. */
  85
  86 void
  87 do_scrub_begin (m68k_mri)
  88      int m68k_mri;
  89 {
  90   const char *p;
  91   int c;
  92
  93   scrub_m68k_mri = m68k_mri;
  94
  95   lex[' '] = LEX_IS_WHITESPACE;
  96   lex['\t'] = LEX_IS_WHITESPACE;
  97   lex['\r'] = LEX_IS_WHITESPACE;
  98   lex['\n'] = LEX_IS_NEWLINE;
  99   lex[';'] = LEX_IS_LINE_SEPARATOR;
 100   lex[':'] = LEX_IS_COLON;
 101
 102   if (! m68k_mri)
 103     {
 104       lex['"'] = LEX_IS_STRINGQUOTE;
 105
 106 #ifndef TC_HPPA
 107       lex['\''] = LEX_IS_ONECHAR_QUOTE;
 108 #endif
 109
 110 #ifdef SINGLE_QUOTE_STRINGS
 111       lex['\''] = LEX_IS_STRINGQUOTE;
 112 #endif
 113     }
 114
 115   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
 116      in state 5 of do_scrub_chars must be changed.  */
 117
 118   /* Note that these override the previous defaults, e.g. if ';' is a
 119      comment char, then it isn't a line separator.  */
 120   for (p = symbol_chars; *p; ++p)
 121     {
 122       lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
 123     }                           /* declare symbol characters */
 124
 125   for (c = 128; c < 256; ++c)
 126     lex[c] = LEX_IS_SYMBOL_COMPONENT;
 127
 128 #ifdef tc_symbol_chars
 129   /* This macro permits the processor to specify all characters which
 130      may appears in an operand.  This will prevent the scrubber from
 131      discarding meaningful whitespace in certain cases.  The i386
 132      backend uses this to support prefixes, which can confuse the
 133      scrubber as to whether it is parsing operands or opcodes.  */
 134   for (p = tc_symbol_chars; *p; ++p)
 135     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
 136 #endif
 137
 138   /* The m68k backend wants to be able to change comment_chars.  */
 139 #ifndef tc_comment_chars
 140 #define tc_comment_chars comment_chars
 141 #endif
 142   for (p = tc_comment_chars; *p; p++)
 143     {
 144       lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
 145     }                           /* declare comment chars */
 146
 147   for (p = line_comment_chars; *p; p++)
 148     {
 149       lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
 150     }                           /* declare line comment chars */
 151
 152   for (p = line_separator_chars; *p; p++)
 153     {
 154       lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
 155     }                           /* declare line separators */
 156
 157   /* Only allow slash-star comments if slash is not in use.
 158      FIXME: This isn't right.  We should always permit them.  */
 159   if (lex['/'] == 0)
 160     {
 161       lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 162     }
 163
 164   if (m68k_mri)
 165     {
 166       lex['\''] = LEX_IS_STRINGQUOTE;
 167       lex[';'] = LEX_IS_COMMENT_START;
 168       lex['*'] = LEX_IS_LINE_COMMENT_START;
 169       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
 170          then it can't be used in an expression.  */
 171       lex['!'] = LEX_IS_LINE_COMMENT_START;
 172     }
 173
 174 #ifdef TC_V850
 175   lex['-'] = LEX_IS_DOUBLEDASH_1ST;
 176 #endif
 177 #ifdef TC_M32R
 178   lex['|'] = LEX_IS_DOUBLEBAR_1ST;
 179 #endif
 180 #ifdef TC_D30V
 181   /* must do this is we want VLIW instruction with "->" or "<-" */
 182   lex['-'] = LEX_IS_SYMBOL_COMPONENT;
 183 #endif
 184 }                               /* do_scrub_begin() */
 185
 186 /* Saved state of the scrubber */
 187 static int state;
 188 static int old_state;
 189 static char *out_string;
 190 static char out_buf[20];
 191 static int add_newlines;
 192 static char *saved_input;
 193 static int saved_input_len;
 194 static const char *mri_state;
 195 static char mri_last_ch;
 196
 197 /* Data structure for saving the state of app across #include's.  Note that
 198    app is called asynchronously to the parsing of the .include's, so our
 199    state at the time .include is interpreted is completely unrelated.
 200    That's why we have to save it all.  */
 201
 202 struct app_save
 203   {
 204     int          state;
 205     int          old_state;
 206     char *       out_string;
 207     char         out_buf[sizeof (out_buf)];
 208     int          add_newlines;
 209     char *       saved_input;
 210     int          saved_input_len;
 211     int          scrub_m68k_mri;
 212     const char * mri_state;
 213     char         mri_last_ch;
 214 #if defined TC_ARM && defined OBJ_ELF
 215     const char * symver_state;
 216 #endif
 217   };
 218
 219 char *
 220 app_push ()
 221 {
 222   register struct app_save *saved;
 223
 224   saved = (struct app_save *) xmalloc (sizeof (*saved));
 225   saved->state = state;
 226   saved->old_state = old_state;
 227   saved->out_string = out_string;
 228   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
 229   saved->add_newlines = add_newlines;
 230   saved->saved_input = saved_input;
 231   saved->saved_input_len = saved_input_len;
 232   saved->scrub_m68k_mri = scrub_m68k_mri;
 233   saved->mri_state = mri_state;
 234   saved->mri_last_ch = mri_last_ch;
 235 #if defined TC_ARM && defined OBJ_ELF
 236   saved->symver_state = symver_state;
 237 #endif
 238
 239   /* do_scrub_begin() is not useful, just wastes time. */
 240
 241   state = 0;
 242   saved_input = NULL;
 243
 244   return (char *) saved;
 245 }
 246
 247 void
 248 app_pop (arg)
 249      char *arg;
 250 {
 251   register struct app_save *saved = (struct app_save *) arg;
 252
 253   /* There is no do_scrub_end (). */
 254   state = saved->state;
 255   old_state = saved->old_state;
 256   out_string = saved->out_string;
 257   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
 258   add_newlines = saved->add_newlines;
 259   saved_input = saved->saved_input;
 260   saved_input_len = saved->saved_input_len;
 261   scrub_m68k_mri = saved->scrub_m68k_mri;
 262   mri_state = saved->mri_state;
 263   mri_last_ch = saved->mri_last_ch;
 264 #if defined TC_ARM && defined OBJ_ELF
 265   symver_state = saved->symver_state;
 266 #endif
 267
 268   free (arg);
 269 }                               /* app_pop() */
 270
 271 /* @@ This assumes that \n &c are the same on host and target.  This is not
 272    necessarily true.  */
 273 static int
 274 process_escape (ch)
 275      int ch;
 276 {
 277   switch (ch)
 278     {
 279     case 'b':
 280       return '\b';
 281     case 'f':
 282       return '\f';
 283     case 'n':
 284       return '\n';
 285     case 'r':
 286       return '\r';
 287     case 't':
 288       return '\t';
 289     case '\'':
 290       return '\'';
 291     case '"':
 292       return '\"';
 293     default:
 294       return ch;
 295     }
 296 }
 297
 298 /* This function is called to process input characters.  The GET
 299    parameter is used to retrieve more input characters.  GET should
 300    set its parameter to point to a buffer, and return the length of
 301    the buffer; it should return 0 at end of file.  The scrubbed output
 302    characters are put into the buffer starting at TOSTART; the TOSTART
 303    buffer is TOLEN bytes in length.  The function returns the number
 304    of scrubbed characters put into TOSTART.  This will be TOLEN unless
 305    end of file was seen.  This function is arranged as a state
 306    machine, and saves its state so that it may return at any point.
 307    This is the way the old code used to work.  */
 308
 309 int
 310 do_scrub_chars (get, tostart, tolen)
 311      int (*get) PARAMS ((char **));
 312      char *tostart;
 313      int tolen;
 314 {
 315   char *to = tostart;
 316   char *toend = tostart + tolen;
 317   char *from;
 318   char *fromend;
 319   int fromlen;
 320   register int ch, ch2 = 0;
 321
 322   /*State 0: beginning of normal line
 323           1: After first whitespace on line (flush more white)
 324           2: After first non-white (opcode) on line (keep 1white)
 325           3: after second white on line (into operands) (flush white)
 326           4: after putting out a .line, put out digits
 327           5: parsing a string, then go to old-state
 328           6: putting out \ escape in a "d string.
 329           7: After putting out a .appfile, put out string.
 330           8: After putting out a .appfile string, flush until newline.
 331           9: After seeing symbol char in state 3 (keep 1white after symchar)
 332          10: After seeing whitespace in state 9 (keep white before symchar)
 333          11: After seeing a symbol character in state 0 (eg a label definition)
 334          -1: output string in out_string and go to the state in old_state
 335          -2: flush text until a '*' '/' is seen, then go to state old_state
 336 #ifdef TC_V850
 337          12: After seeing a dash, looking for a second dash as a start of comment.
 338 #endif
 339 #ifdef TC_M32R
 340          13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
 341 #endif
 342           */
 343
 344   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
 345      constructs like ``.loc 1 20''.  This was turning into ``.loc
 346      120''.  States 9 and 10 ensure that a space is never dropped in
 347      between characters which could appear in a identifier.  Ian
 348      Taylor, ian@cygnus.com.
 349
 350      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
 351      correctly on the PA (and any other target where colons are optional).
 352      Jeff Law, law@cs.utah.edu.
 353
 354      I added state 13 so that something like "cmp r1, r2 || trap #1" does not
 355      get squashed into "cmp r1,r2||trap#1", with the all important space
 356      between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
 357
 358   /* This macro gets the next input character.  */
 359
 360 #define GET()                           \
 361   (from < fromend                       \
 362    ? * (unsigned char *) (from++)       \
 363    : ((saved_input != NULL              \
 364        ? (free (saved_input),           \
 365           saved_input = NULL,           \
 366           0)                            \
 367        : 0),                            \
 368       fromlen = (*get) (&from),         \
 369       fromend = from + fromlen,         \
 370       (fromlen == 0                     \
 371        ? EOF                            \
 372        : * (unsigned char *) (from++))))
 373
 374   /* This macro pushes a character back on the input stream.  */
 375
 376 #define UNGET(uch) (*--from = (uch))
 377
 378   /* This macro puts a character into the output buffer.  If this
 379      character fills the output buffer, this macro jumps to the label
 380      TOFULL.  We use this rather ugly approach because we need to
 381      handle two different termination conditions: EOF on the input
 382      stream, and a full output buffer.  It would be simpler if we
 383      always read in the entire input stream before processing it, but
 384      I don't want to make such a significant change to the assembler's
 385      memory usage.  */
 386
 387 #define PUT(pch)                        \
 388   do                                    \
 389     {                                   \
 390       *to++ = (pch);                    \
 391       if (to >= toend)                  \
 392         goto tofull;                    \
 393     }                                   \
 394   while (0)
 395
 396   if (saved_input != NULL)
 397     {
 398       from = saved_input;
 399       fromend = from + saved_input_len;
 400     }
 401   else
 402     {
 403       fromlen = (*get) (&from);
 404       if (fromlen == 0)
 405         return 0;
 406       fromend = from + fromlen;
 407     }
 408
 409   while (1)
 410     {
 411       /* The cases in this switch end with continue, in order to
 412          branch back to the top of this while loop and generate the
 413          next output character in the appropriate state.  */
 414       switch (state)
 415         {
 416         case -1:
 417           ch = *out_string++;
 418           if (*out_string == '\0')
 419             {
 420               state = old_state;
 421               old_state = 3;
 422             }
 423           PUT (ch);
 424           continue;
 425
 426         case -2:
 427           for (;;)
 428             {
 429               do
 430                 {
 431                   ch = GET ();
 432
 433                   if (ch == EOF)
 434                     {
 435                       as_warn (_("end of file in comment"));
 436                       goto fromeof;
 437                     }
 438
 439                   if (ch == '\n')
 440                     PUT ('\n');
 441                 }
 442               while (ch != '*');
 443
 444               while ((ch = GET ()) == '*')
 445                 ;
 446
 447               if (ch == EOF)
 448                 {
 449                   as_warn (_("end of file in comment"));
 450                   goto fromeof;
 451                 }
 452
 453               if (ch == '/')
 454                 break;
 455
 456               UNGET (ch);
 457             }
 458
 459           state = old_state;
 460           UNGET (' ');
 461           continue;
 462
 463         case 4:
 464           ch = GET ();
 465           if (ch == EOF)
 466             goto fromeof;
 467           else if (ch >= '0' && ch <= '9')
 468             PUT (ch);
 469           else
 470             {
 471               while (ch != EOF && IS_WHITESPACE (ch))
 472                 ch = GET ();
 473               if (ch == '"')
 474                 {
 475                   UNGET (ch);
 476                   if (scrub_m68k_mri)
 477                     out_string = "\n\tappfile ";
 478                   else
 479                     out_string = "\n\t.appfile ";
 480                   old_state = 7;
 481                   state = -1;
 482                   PUT (*out_string++);
 483                 }
 484               else
 485                 {
 486                   while (ch != EOF && ch != '\n')
 487                     ch = GET ();
 488                   state = 0;
 489                   PUT (ch);
 490                 }
 491             }
 492           continue;
 493
 494         case 5:
 495           /* We are going to copy everything up to a quote character,
 496              with special handling for a backslash.  We try to
 497              optimize the copying in the simple case without using the
 498              GET and PUT macros.  */
 499           {
 500             char *s;
 501             int len;
 502
 503             for (s = from; s < fromend; s++)
 504               {
 505                 ch = *s;
 506                 /* This condition must be changed if the type of any
 507                    other character can be LEX_IS_STRINGQUOTE.  */
 508                 if (ch == '\\'
 509                     || ch == '"'
 510                     || ch == '\''
 511                     || ch == '\n')
 512                   break;
 513               }
 514             len = s - from;
 515             if (len > toend - to)
 516               len = toend - to;
 517             if (len > 0)
 518               {
 519                 memcpy (to, from, len);
 520                 to += len;
 521                 from += len;
 522               }
 523           }
 524
 525           ch = GET ();
 526           if (ch == EOF)
 527             {
 528               as_warn (_("end of file in string: inserted '\"'"));
 529               state = old_state;
 530               UNGET ('\n');
 531               PUT ('"');
 532             }
 533           else if (lex[ch] == LEX_IS_STRINGQUOTE)
 534             {
 535               state = old_state;
 536               PUT (ch);
 537             }
 538 #ifndef NO_STRING_ESCAPES
 539           else if (ch == '\\')
 540             {
 541               state = 6;
 542               PUT (ch);
 543             }
 544 #endif
 545           else if (scrub_m68k_mri && ch == '\n')
 546             {
 547               /* Just quietly terminate the string.  This permits lines like
 548                    bne  label   loop if we haven't reach end yet
 549                  */
 550               state = old_state;
 551               UNGET (ch);
 552               PUT ('\'');
 553             }
 554           else
 555             {
 556               PUT (ch);
 557             }
 558           continue;
 559
 560         case 6:
 561           state = 5;
 562           ch = GET ();
 563           switch (ch)
 564             {
 565               /* Handle strings broken across lines, by turning '\n' into
 566                  '\\' and 'n'.  */
 567             case '\n':
 568               UNGET ('n');
 569               add_newlines++;
 570               PUT ('\\');
 571               continue;
 572
 573             case '"':
 574             case '\\':
 575             case 'b':
 576             case 'f':
 577             case 'n':
 578             case 'r':
 579             case 't':
 580             case 'v':
 581             case 'x':
 582             case 'X':
 583             case '0':
 584             case '1':
 585             case '2':
 586             case '3':
 587             case '4':
 588             case '5':
 589             case '6':
 590             case '7':
 591               break;
 592 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 593             default:
 594               as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
 595               break;
 596 #else  /* ONLY_STANDARD_ESCAPES */
 597             default:
 598               /* Accept \x as x for any x */
 599               break;
 600 #endif /* ONLY_STANDARD_ESCAPES */
 601
 602             case EOF:
 603               as_warn (_("End of file in string: '\"' inserted"));
 604               PUT ('"');
 605               continue;
 606             }
 607           PUT (ch);
 608           continue;
 609
 610         case 7:
 611           ch = GET ();
 612           state = 5;
 613           old_state = 8;
 614           if (ch == EOF)
 615             goto fromeof;
 616           PUT (ch);
 617           continue;
 618
 619         case 8:
 620           do
 621             ch = GET ();
 622           while (ch != '\n' && ch != EOF);
 623           if (ch == EOF)
 624             goto fromeof;
 625           state = 0;
 626           PUT (ch);
 627           continue;
 628         }
 629
 630       /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
 631
 632       /* flushchar: */
 633       ch = GET ();
 634
 635     recycle:
 636
 637 #if defined TC_ARM && defined OBJ_ELF
 638       /* We need to watch out for .symver directives.  See the comment later
 639          in this function.  */
 640       if (symver_state == NULL)
 641         {
 642           if ((state == 0 || state == 1) && ch == symver_pseudo[0])
 643             symver_state = symver_pseudo + 1;
 644         }
 645       else
 646         {
 647           /* We advance to the next state if we find the right
 648              character.  */
 649           if (ch != '\0' && (*symver_state == ch))
 650             ++symver_state;
 651           else if (*symver_state != '\0')
 652             /* We did not get the expected character, or we didn't
 653                get a valid terminating character after seeing the
 654                entire pseudo-op, so we must go back to the beginning.  */
 655             symver_state = NULL;
 656           else
 657             {
 658               /* We've read the entire pseudo-op.  If this is the end
 659                  of the line, go back to the beginning.  */
 660               if (IS_NEWLINE (ch))
 661                 symver_state = NULL;
 662             }
 663         }
 664 #endif /* TC_ARM && OBJ_ELF */
 665
 666 #ifdef TC_M68K
 667       /* We want to have pseudo-ops which control whether we are in
 668          MRI mode or not.  Unfortunately, since m68k MRI mode affects
 669          the scrubber, that means that we need a special purpose
 670          recognizer here.  */
 671       if (mri_state == NULL)
 672         {
 673           if ((state == 0 || state == 1)
 674               && ch == mri_pseudo[0])
 675             mri_state = mri_pseudo + 1;
 676         }
 677       else
 678         {
 679           /* We advance to the next state if we find the right
 680              character, or if we need a space character and we get any
 681              whitespace character, or if we need a '0' and we get a
 682              '1' (this is so that we only need one state to handle
 683              ``.mri 0'' and ``.mri 1'').  */
 684           if (ch != '\0'
 685               && (*mri_state == ch
 686                   || (*mri_state == ' '
 687                       && lex[ch] == LEX_IS_WHITESPACE)
 688                   || (*mri_state == '0'
 689                       && ch == '1')))
 690             {
 691               mri_last_ch = ch;
 692               ++mri_state;
 693             }
 694           else if (*mri_state != '\0'
 695                    || (lex[ch] != LEX_IS_WHITESPACE
 696                        && lex[ch] != LEX_IS_NEWLINE))
 697             {
 698               /* We did not get the expected character, or we didn't
 699                  get a valid terminating character after seeing the
 700                  entire pseudo-op, so we must go back to the
 701                  beginning.  */
 702               mri_state = NULL;
 703             }
 704           else
 705             {
 706               /* We've read the entire pseudo-op.  mips_last_ch is
 707                  either '0' or '1' indicating whether to enter or
 708                  leave MRI mode.  */
 709               do_scrub_begin (mri_last_ch == '1');
 710               mri_state = NULL;
 711
 712               /* We continue handling the character as usual.  The
 713                  main gas reader must also handle the .mri pseudo-op
 714                  to control expression parsing and the like.  */
 715             }
 716         }
 717 #endif
 718
 719       if (ch == EOF)
 720         {
 721           if (state != 0)
 722             {
 723               as_warn (_("end of file not at end of a line; newline inserted"));
 724               state = 0;
 725               PUT ('\n');
 726             }
 727           goto fromeof;
 728         }
 729
 730       switch (lex[ch])
 731         {
 732         case LEX_IS_WHITESPACE:
 733           do
 734             {
 735               ch = GET ();
 736             }
 737           while (ch != EOF && IS_WHITESPACE (ch));
 738           if (ch == EOF)
 739             goto fromeof;
 740
 741           if (state == 0)
 742             {
 743               /* Preserve a single whitespace character at the
 744                  beginning of a line.  */
 745               state = 1;
 746               UNGET (ch);
 747               PUT (' ');
 748               break;
 749             }
 750
 751           if (IS_COMMENT (ch)
 752               || ch == '/'
 753               || IS_LINE_SEPARATOR (ch))
 754             {
 755               if (scrub_m68k_mri)
 756                 {
 757                   /* In MRI mode, we keep these spaces.  */
 758                   UNGET (ch);
 759                   PUT (' ');
 760                   break;
 761                 }
 762               goto recycle;
 763             }
 764
 765           /* If we're in state 2 or 11, we've seen a non-white
 766              character followed by whitespace.  If the next character
 767              is ':', this is whitespace after a label name which we
 768              normally must ignore.  In MRI mode, though, spaces are
 769              not permitted between the label and the colon.  */
 770           if ((state == 2 || state == 11)
 771               && lex[ch] == LEX_IS_COLON
 772               && ! scrub_m68k_mri)
 773             {
 774               state = 1;
 775               PUT (ch);
 776               break;
 777             }
 778
 779           switch (state)
 780             {
 781             case 0:
 782               state++;
 783               goto recycle;     /* Punted leading sp */
 784             case 1:
 785               /* We can arrive here if we leave a leading whitespace
 786                  character at the beginning of a line.  */
 787               goto recycle;
 788             case 2:
 789               state = 3;
 790               if (to + 1 < toend)
 791                 {
 792                   /* Optimize common case by skipping UNGET/GET.  */
 793                   PUT (' ');    /* Sp after opco */
 794                   goto recycle;
 795                 }
 796               UNGET (ch);
 797               PUT (' ');
 798               break;
 799             case 3:
 800               if (scrub_m68k_mri)
 801                 {
 802                   /* In MRI mode, we keep these spaces.  */
 803                   UNGET (ch);
 804                   PUT (' ');
 805                   break;
 806                 }
 807               goto recycle;     /* Sp in operands */
 808             case 9:
 809             case 10:
 810               if (scrub_m68k_mri)
 811                 {
 812                   /* In MRI mode, we keep these spaces.  */
 813                   state = 3;
 814                   UNGET (ch);
 815                   PUT (' ');
 816                   break;
 817                 }
 818               state = 10;       /* Sp after symbol char */
 819               goto recycle;
 820             case 11:
 821               if (flag_m68k_mri
 822 #ifdef LABELS_WITHOUT_COLONS
 823                   || 1
 824 #endif
 825                   )
 826                 state = 1;
 827               else
 828                 {
 829                   /* We know that ch is not ':', since we tested that
 830                      case above.  Therefore this is not a label, so it
 831                      must be the opcode, and we've just seen the
 832                      whitespace after it.  */
 833                   state = 3;
 834                 }
 835               UNGET (ch);
 836               PUT (' ');        /* Sp after label definition.  */
 837               break;
 838             default:
 839               BAD_CASE (state);
 840             }
 841           break;
 842
 843         case LEX_IS_TWOCHAR_COMMENT_1ST:
 844           ch2 = GET ();
 845           if (ch2 == '*')
 846             {
 847               for (;;)
 848                 {
 849                   do
 850                     {
 851                       ch2 = GET ();
 852                       if (ch2 != EOF && IS_NEWLINE (ch2))
 853                         add_newlines++;
 854                     }
 855                   while (ch2 != EOF && ch2 != '*');
 856
 857                   while (ch2 == '*')
 858                     ch2 = GET ();
 859
 860                   if (ch2 == EOF || ch2 == '/')
 861                     break;
 862
 863                   /* This UNGET will ensure that we count newlines
 864                      correctly.  */
 865                   UNGET (ch2);
 866                 }
 867
 868               if (ch2 == EOF)
 869                 as_warn (_("end of file in multiline comment"));
 870
 871               ch = ' ';
 872               goto recycle;
 873             }
 874           else
 875             {
 876               if (ch2 != EOF)
 877                 UNGET (ch2);
 878               if (state == 9 || state == 10)
 879                 state = 3;
 880               PUT (ch);
 881             }
 882           break;
 883
 884         case LEX_IS_STRINGQUOTE:
 885           if (state == 10)
 886             {
 887               /* Preserve the whitespace in foo "bar" */
 888               UNGET (ch);
 889               state = 3;
 890               PUT (' ');
 891
 892               /* PUT didn't jump out.  We could just break, but we
 893                  know what will happen, so optimize a bit.  */
 894               ch = GET ();
 895               old_state = 3;
 896             }
 897           else if (state == 9)
 898             old_state = 3;
 899           else
 900             old_state = state;
 901           state = 5;
 902           PUT (ch);
 903           break;
 904
 905 #ifndef IEEE_STYLE
 906         case LEX_IS_ONECHAR_QUOTE:
 907           if (state == 10)
 908             {
 909               /* Preserve the whitespace in foo 'b' */
 910               UNGET (ch);
 911               state = 3;
 912               PUT (' ');
 913               break;
 914             }
 915           ch = GET ();
 916           if (ch == EOF)
 917             {
 918               as_warn (_("end of file after a one-character quote; \\0 inserted"));
 919               ch = 0;
 920             }
 921           if (ch == '\\')
 922             {
 923               ch = GET ();
 924               if (ch == EOF)
 925                 {
 926                   as_warn (_("end of file in escape character"));
 927                   ch = '\\';
 928                 }
 929               else
 930                 ch = process_escape (ch);
 931             }
 932           sprintf (out_buf, "%d", (int) (unsigned char) ch);
 933
 934           /* None of these 'x constants for us.  We want 'x'.  */
 935           if ((ch = GET ()) != '\'')
 936             {
 937 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 938               as_warn (_("Missing close quote: (assumed)"));
 939 #else
 940               if (ch != EOF)
 941                 UNGET (ch);
 942 #endif
 943             }
 944           if (strlen (out_buf) == 1)
 945             {
 946               PUT (out_buf[0]);
 947               break;
 948             }
 949           if (state == 9)
 950             old_state = 3;
 951           else
 952             old_state = state;
 953           state = -1;
 954           out_string = out_buf;
 955           PUT (*out_string++);
 956           break;
 957 #endif
 958
 959         case LEX_IS_COLON:
 960           if (state == 9 || state == 10)
 961             state = 3;
 962           else if (state != 3)
 963             state = 1;
 964           PUT (ch);
 965           break;
 966
 967         case LEX_IS_NEWLINE:
 968           /* Roll out a bunch of newlines from inside comments, etc.  */
 969           if (add_newlines)
 970             {
 971               --add_newlines;
 972               UNGET (ch);
 973             }
 974           /* fall thru into... */
 975
 976         case LEX_IS_LINE_SEPARATOR:
 977           state = 0;
 978           PUT (ch);
 979           break;
 980
 981 #ifdef TC_V850
 982         case LEX_IS_DOUBLEDASH_1ST:
 983           ch2 = GET();
 984           if (ch2 != '-')
 985             {
 986               UNGET (ch2);
 987               goto de_fault;
 988             }
 989           /* read and skip to end of line */
 990           do
 991             {
 992               ch = GET ();
 993             }
 994           while (ch != EOF && ch != '\n');
 995           if (ch == EOF)
 996             {
 997               as_warn (_("end of file in comment; newline inserted"));
 998             }
 999           state = 0;
1000           PUT ('\n');
1001           break;
1002 #endif
1003 #ifdef TC_M32R
1004         case LEX_IS_DOUBLEBAR_1ST:
1005           ch2 = GET();
1006           if (ch2 != '|')
1007             {
1008               UNGET (ch2);
1009               goto de_fault;
1010             }
1011           /* Reset back to state 1 and pretend that we are parsing a line from
1012              just after the first white space.  */
1013           state = 1;
1014           PUT ('|');
1015           PUT ('|');
1016           break;
1017 #endif
1018         case LEX_IS_LINE_COMMENT_START:
1019           /* FIXME-someday: The two character comment stuff was badly
1020              thought out.  On i386, we want '/' as line comment start
1021              AND we want C style comments.  hence this hack.  The
1022              whole lexical process should be reworked.  xoxorich.  */
1023           if (ch == '/')
1024             {
1025               ch2 = GET ();
1026               if (ch2 == '*')
1027                 {
1028                   old_state = 3;
1029                   state = -2;
1030                   break;
1031                 }
1032               else
1033                 {
1034                   UNGET (ch2);
1035                 }
1036             } /* bad hack */
1037
1038           if (state == 0 || state == 1) /* Only comment at start of line.  */
1039             {
1040               int startch;
1041
1042               startch = ch;
1043
1044               do
1045                 {
1046                   ch = GET ();
1047                 }
1048               while (ch != EOF && IS_WHITESPACE (ch));
1049               if (ch == EOF)
1050                 {
1051                   as_warn (_("end of file in comment; newline inserted"));
1052                   PUT ('\n');
1053                   break;
1054                 }
1055               if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1056                 {
1057                   /* Not a cpp line.  */
1058                   while (ch != EOF && !IS_NEWLINE (ch))
1059                     ch = GET ();
1060                   if (ch == EOF)
1061                     as_warn (_("EOF in Comment: Newline inserted"));
1062                   state = 0;
1063                   PUT ('\n');
1064                   break;
1065                 }
1066               /* Loks like `# 123 "filename"' from cpp.  */
1067               UNGET (ch);
1068               old_state = 4;
1069               state = -1;
1070               if (scrub_m68k_mri)
1071                 out_string = "\tappline ";
1072               else
1073                 out_string = "\t.appline ";
1074               PUT (*out_string++);
1075               break;
1076             }
1077
1078 #ifdef TC_D10V
1079           /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1080              Trap is the only short insn that has a first operand that is
1081              neither register nor label.
1082              We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1083              We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is already
1084              LEX_IS_LINE_COMMENT_START.  However, it is the only character in
1085              line_comment_chars for d10v, hence we can recognize it as such.  */
1086           /* An alternative approach would be to reset the state to 1 when
1087              we see '||', '<'- or '->', but that seems to be overkill.  */
1088           if (state == 10) PUT (' ');
1089 #endif
1090           /* We have a line comment character which is not at the
1091              start of a line.  If this is also a normal comment
1092              character, fall through.  Otherwise treat it as a default
1093              character.  */
1094           if (strchr (tc_comment_chars, ch) == NULL
1095               && (! scrub_m68k_mri
1096                   || (ch != '!' && ch != '*')))
1097             goto de_fault;
1098           if (scrub_m68k_mri
1099               && (ch == '!' || ch == '*' || ch == '#')
1100               && state != 1
1101               && state != 10)
1102             goto de_fault;
1103           /* Fall through.  */
1104         case LEX_IS_COMMENT_START:
1105 #if defined TC_ARM && defined OBJ_ELF
1106           /* On the ARM, `@' is the comment character.
1107              Unfortunately this is also a special character in ELF .symver
1108              directives (and .type, though we deal with those another way).  So
1109              we check if this line is such a directive, and treat the character
1110              as default if so.  This is a hack.  */
1111           if ((symver_state != NULL) && (*symver_state == 0))
1112             goto de_fault;
1113 #endif
1114           do
1115             {
1116               ch = GET ();
1117             }
1118           while (ch != EOF && !IS_NEWLINE (ch));
1119           if (ch == EOF)
1120             as_warn (_("end of file in comment; newline inserted"));
1121           state = 0;
1122           PUT ('\n');
1123           break;
1124
1125         case LEX_IS_SYMBOL_COMPONENT:
1126           if (state == 10)
1127             {
1128               /* This is a symbol character following another symbol
1129                  character, with whitespace in between.  We skipped
1130                  the whitespace earlier, so output it now.  */
1131               UNGET (ch);
1132               state = 3;
1133               PUT (' ');
1134               break;
1135             }
1136
1137           if (state == 3)
1138             state = 9;
1139
1140           /* This is a common case.  Quickly copy CH and all the
1141              following symbol component or normal characters.  */
1142           if (to + 1 < toend
1143               && mri_state == NULL
1144 #if defined TC_ARM && defined OBJ_ELF
1145               && symver_state == NULL
1146 #endif
1147               )
1148             {
1149               char *s;
1150               int len;
1151
1152               for (s = from; s < fromend; s++)
1153                 {
1154                   int type;
1155
1156                   ch2 = * (unsigned char *) s;
1157                   type = lex[ch2];
1158                   if (type != 0
1159                       && type != LEX_IS_SYMBOL_COMPONENT)
1160                     break;
1161                 }
1162               if (s > from)
1163                 {
1164                   /* Handle the last character normally, for
1165                      simplicity.  */
1166                   --s;
1167                 }
1168               len = s - from;
1169               if (len > (toend - to) - 1)
1170                 len = (toend - to) - 1;
1171               if (len > 0)
1172                 {
1173                   PUT (ch);
1174                   if (len > 8)
1175                     {
1176                       memcpy (to, from, len);
1177                       to += len;
1178                       from += len;
1179                     }
1180                   else
1181                     {
1182                       switch (len)
1183                         {
1184                         case 8: *to++ = *from++;
1185                         case 7: *to++ = *from++;
1186                         case 6: *to++ = *from++;
1187                         case 5: *to++ = *from++;
1188                         case 4: *to++ = *from++;
1189                         case 3: *to++ = *from++;
1190                         case 2: *to++ = *from++;
1191                         case 1: *to++ = *from++;
1192                         }
1193                     }
1194                   ch = GET ();
1195                 }
1196             }
1197
1198           /* Fall through.  */
1199         default:
1200         de_fault:
1201           /* Some relatively `normal' character.  */
1202           if (state == 0)
1203             {
1204               state = 11;       /* Now seeing label definition */
1205             }
1206           else if (state == 1)
1207             {
1208               state = 2;        /* Ditto */
1209             }
1210           else if (state == 9)
1211             {
1212               if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
1213                 state = 3;
1214             }
1215           else if (state == 10)
1216             {
1217               state = 3;
1218             }
1219           PUT (ch);
1220           break;
1221         }
1222     }
1223
1224   /*NOTREACHED*/
1225
1226  fromeof:
1227   /* We have reached the end of the input.  */
1228   return to - tostart;
1229
1230  tofull:
1231   /* The output buffer is full.  Save any input we have not yet
1232      processed.  */
1233   if (fromend > from)
1234     {
1235       char *save;
1236
1237       save = (char *) xmalloc (fromend - from);
1238       memcpy (save, from, fromend - from);
1239       if (saved_input != NULL)
1240         free (saved_input);
1241       saved_input = save;
1242       saved_input_len = fromend - from;
1243     }
1244   else
1245     {
1246       if (saved_input != NULL)
1247         {
1248           free (saved_input);
1249           saved_input = NULL;
1250         }
1251     }
1252   return to - tostart;
1253 }
1254
1255 /* end of app.c */