[PATCH 16/57][Arm][GAS] Add support for MVE instructions: vdup, vddup, vdwdup, vidup...
[binutils-gdb.git] / gas / app.c
blob01b90d10bdf42a9e1010a069e0c2ad4e5b3a03f0
1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987-2019 Free Software Foundation, Inc.
4 This file is part of GAS, the GNU Assembler.
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GAS is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
22 /* App, the assembler pre-processor. This pre-processor strips out
23 excess spaces, turns single-quoted characters into a decimal
24 constant, and turns the # in # <number> <filename> <garbage> into a
25 .linefile. This needs better error-handling. */
27 #include "as.h"
29 #if (__STDC__ != 1)
30 #ifndef const
31 #define const /* empty */
32 #endif
33 #endif
35 #ifdef H_TICK_HEX
36 int enable_h_tick_hex = 0;
37 #endif
39 #ifdef TC_M68K
40 /* Whether we are scrubbing in m68k MRI mode. This is different from
41 flag_m68k_mri, because the two flags will be affected by the .mri
42 pseudo-op at different times. */
43 static int scrub_m68k_mri;
45 /* The pseudo-op which switches in and out of MRI mode. See the
46 comment in do_scrub_chars. */
47 static const char mri_pseudo[] = ".mri 0";
48 #else
49 #define scrub_m68k_mri 0
50 #endif
52 #if defined TC_ARM && defined OBJ_ELF
53 /* The pseudo-op for which we need to special-case `@' characters.
54 See the comment in do_scrub_chars. */
55 static const char symver_pseudo[] = ".symver";
56 static const char * symver_state;
57 #endif
58 #ifdef TC_ARM
59 static char last_char;
60 #endif
62 static char lex[256];
63 static const char symbol_chars[] =
64 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
66 #define LEX_IS_SYMBOL_COMPONENT 1
67 #define LEX_IS_WHITESPACE 2
68 #define LEX_IS_LINE_SEPARATOR 3
69 #define LEX_IS_COMMENT_START 4
70 #define LEX_IS_LINE_COMMENT_START 5
71 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
72 #define LEX_IS_STRINGQUOTE 8
73 #define LEX_IS_COLON 9
74 #define LEX_IS_NEWLINE 10
75 #define LEX_IS_ONECHAR_QUOTE 11
76 #ifdef TC_V850
77 #define LEX_IS_DOUBLEDASH_1ST 12
78 #endif
79 #ifdef TC_M32R
80 #define DOUBLEBAR_PARALLEL
81 #endif
82 #ifdef DOUBLEBAR_PARALLEL
83 #define LEX_IS_DOUBLEBAR_1ST 13
84 #endif
85 #define LEX_IS_PARALLEL_SEPARATOR 14
86 #ifdef H_TICK_HEX
87 #define LEX_IS_H 15
88 #endif
89 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
90 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
91 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
92 #define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
93 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
94 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
95 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
97 static int process_escape (int);
99 /* FIXME-soon: The entire lexer/parser thingy should be
100 built statically at compile time rather than dynamically
101 each and every time the assembler is run. xoxorich. */
103 void
104 do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
106 const char *p;
107 int c;
109 lex[' '] = LEX_IS_WHITESPACE;
110 lex['\t'] = LEX_IS_WHITESPACE;
111 lex['\r'] = LEX_IS_WHITESPACE;
112 lex['\n'] = LEX_IS_NEWLINE;
113 lex[':'] = LEX_IS_COLON;
115 #ifdef TC_M68K
116 scrub_m68k_mri = m68k_mri;
118 if (! m68k_mri)
119 #endif
121 lex['"'] = LEX_IS_STRINGQUOTE;
123 #if ! defined (TC_HPPA)
124 lex['\''] = LEX_IS_ONECHAR_QUOTE;
125 #endif
127 #ifdef SINGLE_QUOTE_STRINGS
128 lex['\''] = LEX_IS_STRINGQUOTE;
129 #endif
132 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
133 in state 5 of do_scrub_chars must be changed. */
135 /* Note that these override the previous defaults, e.g. if ';' is a
136 comment char, then it isn't a line separator. */
137 for (p = symbol_chars; *p; ++p)
138 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
140 for (c = 128; c < 256; ++c)
141 lex[c] = LEX_IS_SYMBOL_COMPONENT;
143 #ifdef tc_symbol_chars
144 /* This macro permits the processor to specify all characters which
145 may appears in an operand. This will prevent the scrubber from
146 discarding meaningful whitespace in certain cases. The i386
147 backend uses this to support prefixes, which can confuse the
148 scrubber as to whether it is parsing operands or opcodes. */
149 for (p = tc_symbol_chars; *p; ++p)
150 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
151 #endif
153 /* The m68k backend wants to be able to change comment_chars. */
154 #ifndef tc_comment_chars
155 #define tc_comment_chars comment_chars
156 #endif
157 for (p = tc_comment_chars; *p; p++)
158 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
160 for (p = line_comment_chars; *p; p++)
161 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
163 #ifndef tc_line_separator_chars
164 #define tc_line_separator_chars line_separator_chars
165 #endif
166 for (p = tc_line_separator_chars; *p; p++)
167 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
169 #ifdef tc_parallel_separator_chars
170 /* This macro permits the processor to specify all characters which
171 separate parallel insns on the same line. */
172 for (p = tc_parallel_separator_chars; *p; p++)
173 lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
174 #endif
176 /* Only allow slash-star comments if slash is not in use.
177 FIXME: This isn't right. We should always permit them. */
178 if (lex['/'] == 0)
179 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
181 #ifdef TC_M68K
182 if (m68k_mri)
184 lex['\''] = LEX_IS_STRINGQUOTE;
185 lex[';'] = LEX_IS_COMMENT_START;
186 lex['*'] = LEX_IS_LINE_COMMENT_START;
187 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
188 then it can't be used in an expression. */
189 lex['!'] = LEX_IS_LINE_COMMENT_START;
191 #endif
193 #ifdef TC_V850
194 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
195 #endif
196 #ifdef DOUBLEBAR_PARALLEL
197 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
198 #endif
199 #ifdef TC_D30V
200 /* Must do this is we want VLIW instruction with "->" or "<-". */
201 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
202 #endif
204 #ifdef H_TICK_HEX
205 if (enable_h_tick_hex)
207 lex['h'] = LEX_IS_H;
208 lex['H'] = LEX_IS_H;
210 #endif
213 /* Saved state of the scrubber. */
214 static int state;
215 static int old_state;
216 static const char *out_string;
217 static char out_buf[20];
218 static int add_newlines;
219 static char *saved_input;
220 static size_t saved_input_len;
221 static char input_buffer[32 * 1024];
222 static const char *mri_state;
223 static char mri_last_ch;
225 /* Data structure for saving the state of app across #include's. Note that
226 app is called asynchronously to the parsing of the .include's, so our
227 state at the time .include is interpreted is completely unrelated.
228 That's why we have to save it all. */
230 struct app_save
232 int state;
233 int old_state;
234 const char * out_string;
235 char out_buf[sizeof (out_buf)];
236 int add_newlines;
237 char * saved_input;
238 size_t saved_input_len;
239 #ifdef TC_M68K
240 int scrub_m68k_mri;
241 #endif
242 const char * mri_state;
243 char mri_last_ch;
244 #if defined TC_ARM && defined OBJ_ELF
245 const char * symver_state;
246 #endif
247 #ifdef TC_ARM
248 char last_char;
249 #endif
252 char *
253 app_push (void)
255 struct app_save *saved;
257 saved = XNEW (struct app_save);
258 saved->state = state;
259 saved->old_state = old_state;
260 saved->out_string = out_string;
261 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
262 saved->add_newlines = add_newlines;
263 if (saved_input == NULL)
264 saved->saved_input = NULL;
265 else
267 saved->saved_input = XNEWVEC (char, saved_input_len);
268 memcpy (saved->saved_input, saved_input, saved_input_len);
269 saved->saved_input_len = saved_input_len;
271 #ifdef TC_M68K
272 saved->scrub_m68k_mri = scrub_m68k_mri;
273 #endif
274 saved->mri_state = mri_state;
275 saved->mri_last_ch = mri_last_ch;
276 #if defined TC_ARM && defined OBJ_ELF
277 saved->symver_state = symver_state;
278 #endif
279 #ifdef TC_ARM
280 saved->last_char = last_char;
281 #endif
283 /* do_scrub_begin() is not useful, just wastes time. */
285 state = 0;
286 saved_input = NULL;
287 add_newlines = 0;
289 return (char *) saved;
292 void
293 app_pop (char *arg)
295 struct app_save *saved = (struct app_save *) arg;
297 /* There is no do_scrub_end (). */
298 state = saved->state;
299 old_state = saved->old_state;
300 out_string = saved->out_string;
301 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
302 add_newlines = saved->add_newlines;
303 if (saved->saved_input == NULL)
304 saved_input = NULL;
305 else
307 gas_assert (saved->saved_input_len <= sizeof (input_buffer));
308 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
309 saved_input = input_buffer;
310 saved_input_len = saved->saved_input_len;
311 free (saved->saved_input);
313 #ifdef TC_M68K
314 scrub_m68k_mri = saved->scrub_m68k_mri;
315 #endif
316 mri_state = saved->mri_state;
317 mri_last_ch = saved->mri_last_ch;
318 #if defined TC_ARM && defined OBJ_ELF
319 symver_state = saved->symver_state;
320 #endif
321 #ifdef TC_ARM
322 last_char = saved->last_char;
323 #endif
325 free (arg);
328 /* @@ This assumes that \n &c are the same on host and target. This is not
329 necessarily true. */
331 static int
332 process_escape (int ch)
334 switch (ch)
336 case 'b':
337 return '\b';
338 case 'f':
339 return '\f';
340 case 'n':
341 return '\n';
342 case 'r':
343 return '\r';
344 case 't':
345 return '\t';
346 case '\'':
347 return '\'';
348 case '"':
349 return '\"';
350 default:
351 return ch;
355 /* This function is called to process input characters. The GET
356 parameter is used to retrieve more input characters. GET should
357 set its parameter to point to a buffer, and return the length of
358 the buffer; it should return 0 at end of file. The scrubbed output
359 characters are put into the buffer starting at TOSTART; the TOSTART
360 buffer is TOLEN bytes in length. The function returns the number
361 of scrubbed characters put into TOSTART. This will be TOLEN unless
362 end of file was seen. This function is arranged as a state
363 machine, and saves its state so that it may return at any point.
364 This is the way the old code used to work. */
366 size_t
367 do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
369 char *to = tostart;
370 char *toend = tostart + tolen;
371 char *from;
372 char *fromend;
373 size_t fromlen;
374 int ch, ch2 = 0;
375 /* Character that started the string we're working on. */
376 static char quotechar;
378 /*State 0: beginning of normal line
379 1: After first whitespace on line (flush more white)
380 2: After first non-white (opcode) on line (keep 1white)
381 3: after second white on line (into operands) (flush white)
382 4: after putting out a .linefile, put out digits
383 5: parsing a string, then go to old-state
384 6: putting out \ escape in a "d string.
385 7: no longer used
386 8: no longer used
387 9: After seeing symbol char in state 3 (keep 1white after symchar)
388 10: After seeing whitespace in state 9 (keep white before symchar)
389 11: After seeing a symbol character in state 0 (eg a label definition)
390 -1: output string in out_string and go to the state in old_state
391 -2: flush text until a '*' '/' is seen, then go to state old_state
392 #ifdef TC_V850
393 12: After seeing a dash, looking for a second dash as a start
394 of comment.
395 #endif
396 #ifdef DOUBLEBAR_PARALLEL
397 13: After seeing a vertical bar, looking for a second
398 vertical bar as a parallel expression separator.
399 #endif
400 #ifdef TC_PREDICATE_START_CHAR
401 14: After seeing a predicate start character at state 0, looking
402 for a predicate end character as predicate.
403 15: After seeing a predicate start character at state 1, looking
404 for a predicate end character as predicate.
405 #endif
406 #ifdef TC_Z80
407 16: After seeing an 'a' or an 'A' at the start of a symbol
408 17: After seeing an 'f' or an 'F' in state 16
409 #endif
412 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
413 constructs like ``.loc 1 20''. This was turning into ``.loc
414 120''. States 9 and 10 ensure that a space is never dropped in
415 between characters which could appear in an identifier. Ian
416 Taylor, ian@cygnus.com.
418 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
419 correctly on the PA (and any other target where colons are optional).
420 Jeff Law, law@cs.utah.edu.
422 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
423 get squashed into "cmp r1,r2||trap#1", with the all important space
424 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
426 /* This macro gets the next input character. */
428 #define GET() \
429 (from < fromend \
430 ? * (unsigned char *) (from++) \
431 : (saved_input = NULL, \
432 fromlen = (*get) (input_buffer, sizeof input_buffer), \
433 from = input_buffer, \
434 fromend = from + fromlen, \
435 (fromlen == 0 \
436 ? EOF \
437 : * (unsigned char *) (from++))))
439 /* This macro pushes a character back on the input stream. */
441 #define UNGET(uch) (*--from = (uch))
443 /* This macro puts a character into the output buffer. If this
444 character fills the output buffer, this macro jumps to the label
445 TOFULL. We use this rather ugly approach because we need to
446 handle two different termination conditions: EOF on the input
447 stream, and a full output buffer. It would be simpler if we
448 always read in the entire input stream before processing it, but
449 I don't want to make such a significant change to the assembler's
450 memory usage. */
452 #define PUT(pch) \
453 do \
455 *to++ = (pch); \
456 if (to >= toend) \
457 goto tofull; \
459 while (0)
461 if (saved_input != NULL)
463 from = saved_input;
464 fromend = from + saved_input_len;
466 else
468 fromlen = (*get) (input_buffer, sizeof input_buffer);
469 if (fromlen == 0)
470 return 0;
471 from = input_buffer;
472 fromend = from + fromlen;
475 while (1)
477 /* The cases in this switch end with continue, in order to
478 branch back to the top of this while loop and generate the
479 next output character in the appropriate state. */
480 switch (state)
482 case -1:
483 ch = *out_string++;
484 if (*out_string == '\0')
486 state = old_state;
487 old_state = 3;
489 PUT (ch);
490 continue;
492 case -2:
493 for (;;)
497 ch = GET ();
499 if (ch == EOF)
501 as_warn (_("end of file in comment"));
502 goto fromeof;
505 if (ch == '\n')
506 PUT ('\n');
508 while (ch != '*');
510 while ((ch = GET ()) == '*')
513 if (ch == EOF)
515 as_warn (_("end of file in comment"));
516 goto fromeof;
519 if (ch == '/')
520 break;
522 UNGET (ch);
525 state = old_state;
526 UNGET (' ');
527 continue;
529 case 4:
530 ch = GET ();
531 if (ch == EOF)
532 goto fromeof;
533 else if (ch >= '0' && ch <= '9')
534 PUT (ch);
535 else
537 while (ch != EOF && IS_WHITESPACE (ch))
538 ch = GET ();
539 if (ch == '"')
541 quotechar = ch;
542 state = 5;
543 old_state = 3;
544 PUT (ch);
546 else
548 while (ch != EOF && ch != '\n')
549 ch = GET ();
550 state = 0;
551 PUT (ch);
554 continue;
556 case 5:
557 /* We are going to copy everything up to a quote character,
558 with special handling for a backslash. We try to
559 optimize the copying in the simple case without using the
560 GET and PUT macros. */
562 char *s;
563 ptrdiff_t len;
565 for (s = from; s < fromend; s++)
567 ch = *s;
568 if (ch == '\\'
569 || ch == quotechar
570 || ch == '\n')
571 break;
573 len = s - from;
574 if (len > toend - to)
575 len = toend - to;
576 if (len > 0)
578 memcpy (to, from, len);
579 to += len;
580 from += len;
581 if (to >= toend)
582 goto tofull;
586 ch = GET ();
587 if (ch == EOF)
589 /* This buffer is here specifically so
590 that the UNGET below will work. */
591 static char one_char_buf[1];
593 as_warn (_("end of file in string; '%c' inserted"), quotechar);
594 state = old_state;
595 from = fromend = one_char_buf + 1;
596 fromlen = 1;
597 UNGET ('\n');
598 PUT (quotechar);
600 else if (ch == quotechar)
602 state = old_state;
603 PUT (ch);
605 #ifndef NO_STRING_ESCAPES
606 else if (ch == '\\')
608 state = 6;
609 PUT (ch);
611 #endif
612 else if (scrub_m68k_mri && ch == '\n')
614 /* Just quietly terminate the string. This permits lines like
615 bne label loop if we haven't reach end yet. */
616 state = old_state;
617 UNGET (ch);
618 PUT ('\'');
620 else
622 PUT (ch);
624 continue;
626 case 6:
627 state = 5;
628 ch = GET ();
629 switch (ch)
631 /* Handle strings broken across lines, by turning '\n' into
632 '\\' and 'n'. */
633 case '\n':
634 UNGET ('n');
635 add_newlines++;
636 PUT ('\\');
637 continue;
639 case EOF:
640 as_warn (_("end of file in string; '%c' inserted"), quotechar);
641 PUT (quotechar);
642 continue;
644 case '"':
645 case '\\':
646 case 'b':
647 case 'f':
648 case 'n':
649 case 'r':
650 case 't':
651 case 'v':
652 case 'x':
653 case 'X':
654 case '0':
655 case '1':
656 case '2':
657 case '3':
658 case '4':
659 case '5':
660 case '6':
661 case '7':
662 break;
664 default:
665 #ifdef ONLY_STANDARD_ESCAPES
666 as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
667 #endif
668 break;
670 PUT (ch);
671 continue;
673 #ifdef DOUBLEBAR_PARALLEL
674 case 13:
675 ch = GET ();
676 if (ch != '|')
677 abort ();
679 /* Reset back to state 1 and pretend that we are parsing a
680 line from just after the first white space. */
681 state = 1;
682 PUT ('|');
683 #ifdef TC_TIC6X
684 /* "||^" is used for SPMASKed instructions. */
685 ch = GET ();
686 if (ch == EOF)
687 goto fromeof;
688 else if (ch == '^')
689 PUT ('^');
690 else
691 UNGET (ch);
692 #endif
693 continue;
694 #endif
695 #ifdef TC_Z80
696 case 16:
697 /* We have seen an 'a' at the start of a symbol, look for an 'f'. */
698 ch = GET ();
699 if (ch == 'f' || ch == 'F')
701 state = 17;
702 PUT (ch);
704 else
706 state = 9;
707 break;
709 /* Fall through. */
710 case 17:
711 /* We have seen "af" at the start of a symbol,
712 a ' here is a part of that symbol. */
713 ch = GET ();
714 state = 9;
715 if (ch == '\'')
716 /* Change to avoid warning about unclosed string. */
717 PUT ('`');
718 else if (ch != EOF)
719 UNGET (ch);
720 break;
721 #endif
724 /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
726 /* flushchar: */
727 ch = GET ();
729 #ifdef TC_PREDICATE_START_CHAR
730 if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
732 state += 14;
733 PUT (ch);
734 continue;
736 else if (state == 14 || state == 15)
738 if (ch == TC_PREDICATE_END_CHAR)
740 state -= 14;
741 PUT (ch);
742 ch = GET ();
744 else
746 PUT (ch);
747 continue;
750 #endif
752 recycle:
754 #if defined TC_ARM && defined OBJ_ELF
755 /* We need to watch out for .symver directives. See the comment later
756 in this function. */
757 if (symver_state == NULL)
759 if ((state == 0 || state == 1) && ch == symver_pseudo[0])
760 symver_state = symver_pseudo + 1;
762 else
764 /* We advance to the next state if we find the right
765 character. */
766 if (ch != '\0' && (*symver_state == ch))
767 ++symver_state;
768 else if (*symver_state != '\0')
769 /* We did not get the expected character, or we didn't
770 get a valid terminating character after seeing the
771 entire pseudo-op, so we must go back to the beginning. */
772 symver_state = NULL;
773 else
775 /* We've read the entire pseudo-op. If this is the end
776 of the line, go back to the beginning. */
777 if (IS_NEWLINE (ch))
778 symver_state = NULL;
781 #endif /* TC_ARM && OBJ_ELF */
783 #ifdef TC_M68K
784 /* We want to have pseudo-ops which control whether we are in
785 MRI mode or not. Unfortunately, since m68k MRI mode affects
786 the scrubber, that means that we need a special purpose
787 recognizer here. */
788 if (mri_state == NULL)
790 if ((state == 0 || state == 1)
791 && ch == mri_pseudo[0])
792 mri_state = mri_pseudo + 1;
794 else
796 /* We advance to the next state if we find the right
797 character, or if we need a space character and we get any
798 whitespace character, or if we need a '0' and we get a
799 '1' (this is so that we only need one state to handle
800 ``.mri 0'' and ``.mri 1''). */
801 if (ch != '\0'
802 && (*mri_state == ch
803 || (*mri_state == ' '
804 && lex[ch] == LEX_IS_WHITESPACE)
805 || (*mri_state == '0'
806 && ch == '1')))
808 mri_last_ch = ch;
809 ++mri_state;
811 else if (*mri_state != '\0'
812 || (lex[ch] != LEX_IS_WHITESPACE
813 && lex[ch] != LEX_IS_NEWLINE))
815 /* We did not get the expected character, or we didn't
816 get a valid terminating character after seeing the
817 entire pseudo-op, so we must go back to the
818 beginning. */
819 mri_state = NULL;
821 else
823 /* We've read the entire pseudo-op. mips_last_ch is
824 either '0' or '1' indicating whether to enter or
825 leave MRI mode. */
826 do_scrub_begin (mri_last_ch == '1');
827 mri_state = NULL;
829 /* We continue handling the character as usual. The
830 main gas reader must also handle the .mri pseudo-op
831 to control expression parsing and the like. */
834 #endif
836 if (ch == EOF)
838 if (state != 0)
840 as_warn (_("end of file not at end of a line; newline inserted"));
841 state = 0;
842 PUT ('\n');
844 goto fromeof;
847 switch (lex[ch])
849 case LEX_IS_WHITESPACE:
852 ch = GET ();
854 while (ch != EOF && IS_WHITESPACE (ch));
855 if (ch == EOF)
856 goto fromeof;
858 if (state == 0)
860 /* Preserve a single whitespace character at the
861 beginning of a line. */
862 state = 1;
863 UNGET (ch);
864 PUT (' ');
865 break;
868 #ifdef KEEP_WHITE_AROUND_COLON
869 if (lex[ch] == LEX_IS_COLON)
871 /* Only keep this white if there's no white *after* the
872 colon. */
873 ch2 = GET ();
874 if (ch2 != EOF)
875 UNGET (ch2);
876 if (!IS_WHITESPACE (ch2))
878 state = 9;
879 UNGET (ch);
880 PUT (' ');
881 break;
884 #endif
885 if (IS_COMMENT (ch)
886 || ch == '/'
887 || IS_LINE_SEPARATOR (ch)
888 || IS_PARALLEL_SEPARATOR (ch))
890 if (scrub_m68k_mri)
892 /* In MRI mode, we keep these spaces. */
893 UNGET (ch);
894 PUT (' ');
895 break;
897 goto recycle;
900 /* If we're in state 2 or 11, we've seen a non-white
901 character followed by whitespace. If the next character
902 is ':', this is whitespace after a label name which we
903 normally must ignore. In MRI mode, though, spaces are
904 not permitted between the label and the colon. */
905 if ((state == 2 || state == 11)
906 && lex[ch] == LEX_IS_COLON
907 && ! scrub_m68k_mri)
909 state = 1;
910 PUT (ch);
911 break;
914 switch (state)
916 case 1:
917 /* We can arrive here if we leave a leading whitespace
918 character at the beginning of a line. */
919 goto recycle;
920 case 2:
921 state = 3;
922 if (to + 1 < toend)
924 /* Optimize common case by skipping UNGET/GET. */
925 PUT (' '); /* Sp after opco */
926 goto recycle;
928 UNGET (ch);
929 PUT (' ');
930 break;
931 case 3:
932 #ifndef TC_KEEP_OPERAND_SPACES
933 /* For TI C6X, we keep these spaces as they may separate
934 functional unit specifiers from operands. */
935 if (scrub_m68k_mri)
936 #endif
938 /* In MRI mode, we keep these spaces. */
939 UNGET (ch);
940 PUT (' ');
941 break;
943 goto recycle; /* Sp in operands */
944 case 9:
945 case 10:
946 #ifndef TC_KEEP_OPERAND_SPACES
947 if (scrub_m68k_mri)
948 #endif
950 /* In MRI mode, we keep these spaces. */
951 state = 3;
952 UNGET (ch);
953 PUT (' ');
954 break;
956 state = 10; /* Sp after symbol char */
957 goto recycle;
958 case 11:
959 if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
960 state = 1;
961 else
963 /* We know that ch is not ':', since we tested that
964 case above. Therefore this is not a label, so it
965 must be the opcode, and we've just seen the
966 whitespace after it. */
967 state = 3;
969 UNGET (ch);
970 PUT (' '); /* Sp after label definition. */
971 break;
972 default:
973 BAD_CASE (state);
975 break;
977 case LEX_IS_TWOCHAR_COMMENT_1ST:
978 ch2 = GET ();
979 if (ch2 == '*')
981 for (;;)
985 ch2 = GET ();
986 if (ch2 != EOF && IS_NEWLINE (ch2))
987 add_newlines++;
989 while (ch2 != EOF && ch2 != '*');
991 while (ch2 == '*')
992 ch2 = GET ();
994 if (ch2 == EOF || ch2 == '/')
995 break;
997 /* This UNGET will ensure that we count newlines
998 correctly. */
999 UNGET (ch2);
1002 if (ch2 == EOF)
1003 as_warn (_("end of file in multiline comment"));
1005 ch = ' ';
1006 goto recycle;
1008 #ifdef DOUBLESLASH_LINE_COMMENTS
1009 else if (ch2 == '/')
1013 ch = GET ();
1015 while (ch != EOF && !IS_NEWLINE (ch));
1016 if (ch == EOF)
1017 as_warn ("end of file in comment; newline inserted");
1018 state = 0;
1019 PUT ('\n');
1020 break;
1022 #endif
1023 else
1025 if (ch2 != EOF)
1026 UNGET (ch2);
1027 if (state == 9 || state == 10)
1028 state = 3;
1029 PUT (ch);
1031 break;
1033 case LEX_IS_STRINGQUOTE:
1034 quotechar = ch;
1035 if (state == 10)
1037 /* Preserve the whitespace in foo "bar". */
1038 UNGET (ch);
1039 state = 3;
1040 PUT (' ');
1042 /* PUT didn't jump out. We could just break, but we
1043 know what will happen, so optimize a bit. */
1044 ch = GET ();
1045 old_state = 3;
1047 else if (state == 9)
1048 old_state = 3;
1049 else
1050 old_state = state;
1051 state = 5;
1052 PUT (ch);
1053 break;
1055 case LEX_IS_ONECHAR_QUOTE:
1056 #ifdef H_TICK_HEX
1057 if (state == 9 && enable_h_tick_hex)
1059 char c;
1061 c = GET ();
1062 as_warn ("'%c found after symbol", c);
1063 UNGET (c);
1065 #endif
1066 if (state == 10)
1068 /* Preserve the whitespace in foo 'b'. */
1069 UNGET (ch);
1070 state = 3;
1071 PUT (' ');
1072 break;
1074 ch = GET ();
1075 if (ch == EOF)
1077 as_warn (_("end of file after a one-character quote; \\0 inserted"));
1078 ch = 0;
1080 if (ch == '\\')
1082 ch = GET ();
1083 if (ch == EOF)
1085 as_warn (_("end of file in escape character"));
1086 ch = '\\';
1088 else
1089 ch = process_escape (ch);
1091 sprintf (out_buf, "%d", (int) (unsigned char) ch);
1093 /* None of these 'x constants for us. We want 'x'. */
1094 if ((ch = GET ()) != '\'')
1096 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1097 as_warn (_("missing close quote; (assumed)"));
1098 #else
1099 if (ch != EOF)
1100 UNGET (ch);
1101 #endif
1103 if (strlen (out_buf) == 1)
1105 PUT (out_buf[0]);
1106 break;
1108 if (state == 9)
1109 old_state = 3;
1110 else
1111 old_state = state;
1112 state = -1;
1113 out_string = out_buf;
1114 PUT (*out_string++);
1115 break;
1117 case LEX_IS_COLON:
1118 #ifdef KEEP_WHITE_AROUND_COLON
1119 state = 9;
1120 #else
1121 if (state == 9 || state == 10)
1122 state = 3;
1123 else if (state != 3)
1124 state = 1;
1125 #endif
1126 PUT (ch);
1127 break;
1129 case LEX_IS_NEWLINE:
1130 /* Roll out a bunch of newlines from inside comments, etc. */
1131 if (add_newlines)
1133 --add_newlines;
1134 UNGET (ch);
1136 /* Fall through. */
1138 case LEX_IS_LINE_SEPARATOR:
1139 state = 0;
1140 PUT (ch);
1141 break;
1143 case LEX_IS_PARALLEL_SEPARATOR:
1144 state = 1;
1145 PUT (ch);
1146 break;
1148 #ifdef TC_V850
1149 case LEX_IS_DOUBLEDASH_1ST:
1150 ch2 = GET ();
1151 if (ch2 != '-')
1153 if (ch2 != EOF)
1154 UNGET (ch2);
1155 goto de_fault;
1157 /* Read and skip to end of line. */
1160 ch = GET ();
1162 while (ch != EOF && ch != '\n');
1164 if (ch == EOF)
1165 as_warn (_("end of file in comment; newline inserted"));
1167 state = 0;
1168 PUT ('\n');
1169 break;
1170 #endif
1171 #ifdef DOUBLEBAR_PARALLEL
1172 case LEX_IS_DOUBLEBAR_1ST:
1173 ch2 = GET ();
1174 if (ch2 != EOF)
1175 UNGET (ch2);
1176 if (ch2 != '|')
1177 goto de_fault;
1179 /* Handle '||' in two states as invoking PUT twice might
1180 result in the first one jumping out of this loop. We'd
1181 then lose track of the state and one '|' char. */
1182 state = 13;
1183 PUT ('|');
1184 break;
1185 #endif
1186 case LEX_IS_LINE_COMMENT_START:
1187 /* FIXME-someday: The two character comment stuff was badly
1188 thought out. On i386, we want '/' as line comment start
1189 AND we want C style comments. hence this hack. The
1190 whole lexical process should be reworked. xoxorich. */
1191 if (ch == '/')
1193 ch2 = GET ();
1194 if (ch2 == '*')
1196 old_state = 3;
1197 state = -2;
1198 break;
1200 else if (ch2 != EOF)
1202 UNGET (ch2);
1206 if (state == 0 || state == 1) /* Only comment at start of line. */
1208 int startch;
1210 startch = ch;
1214 ch = GET ();
1216 while (ch != EOF && IS_WHITESPACE (ch));
1218 if (ch == EOF)
1220 as_warn (_("end of file in comment; newline inserted"));
1221 PUT ('\n');
1222 break;
1225 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1227 /* Not a cpp line. */
1228 while (ch != EOF && !IS_NEWLINE (ch))
1229 ch = GET ();
1230 if (ch == EOF)
1232 as_warn (_("end of file in comment; newline inserted"));
1233 PUT ('\n');
1235 else /* IS_NEWLINE (ch) */
1237 /* To process non-zero add_newlines. */
1238 UNGET (ch);
1240 state = 0;
1241 break;
1243 /* Looks like `# 123 "filename"' from cpp. */
1244 UNGET (ch);
1245 old_state = 4;
1246 state = -1;
1247 if (scrub_m68k_mri)
1248 out_string = "\tlinefile ";
1249 else
1250 out_string = "\t.linefile ";
1251 PUT (*out_string++);
1252 break;
1255 #ifdef TC_D10V
1256 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1257 Trap is the only short insn that has a first operand that is
1258 neither register nor label.
1259 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1260 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1261 already LEX_IS_LINE_COMMENT_START. However, it is the
1262 only character in line_comment_chars for d10v, hence we
1263 can recognize it as such. */
1264 /* An alternative approach would be to reset the state to 1 when
1265 we see '||', '<'- or '->', but that seems to be overkill. */
1266 if (state == 10)
1267 PUT (' ');
1268 #endif
1269 /* We have a line comment character which is not at the
1270 start of a line. If this is also a normal comment
1271 character, fall through. Otherwise treat it as a default
1272 character. */
1273 if (strchr (tc_comment_chars, ch) == NULL
1274 && (! scrub_m68k_mri
1275 || (ch != '!' && ch != '*')))
1276 goto de_fault;
1277 if (scrub_m68k_mri
1278 && (ch == '!' || ch == '*' || ch == '#')
1279 && state != 1
1280 && state != 10)
1281 goto de_fault;
1282 /* Fall through. */
1283 case LEX_IS_COMMENT_START:
1284 #if defined TC_ARM && defined OBJ_ELF
1285 /* On the ARM, `@' is the comment character.
1286 Unfortunately this is also a special character in ELF .symver
1287 directives (and .type, though we deal with those another way).
1288 So we check if this line is such a directive, and treat
1289 the character as default if so. This is a hack. */
1290 if ((symver_state != NULL) && (*symver_state == 0))
1291 goto de_fault;
1292 #endif
1294 #ifdef TC_ARM
1295 /* For the ARM, care is needed not to damage occurrences of \@
1296 by stripping the @ onwards. Yuck. */
1297 if ((to > tostart ? to[-1] : last_char) == '\\')
1298 /* Do not treat the @ as a start-of-comment. */
1299 goto de_fault;
1300 #endif
1302 #ifdef WARN_COMMENTS
1303 if (!found_comment)
1304 found_comment_file = as_where (&found_comment);
1305 #endif
1308 ch = GET ();
1310 while (ch != EOF && !IS_NEWLINE (ch));
1311 if (ch == EOF)
1312 as_warn (_("end of file in comment; newline inserted"));
1313 state = 0;
1314 PUT ('\n');
1315 break;
1317 #ifdef H_TICK_HEX
1318 case LEX_IS_H:
1319 /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1320 the H' with 0x to make them gas-style hex characters. */
1321 if (enable_h_tick_hex)
1323 char quot;
1325 quot = GET ();
1326 if (quot == '\'')
1328 UNGET ('x');
1329 ch = '0';
1331 else
1332 UNGET (quot);
1334 #endif
1335 /* Fall through. */
1337 case LEX_IS_SYMBOL_COMPONENT:
1338 if (state == 10)
1340 /* This is a symbol character following another symbol
1341 character, with whitespace in between. We skipped
1342 the whitespace earlier, so output it now. */
1343 UNGET (ch);
1344 state = 3;
1345 PUT (' ');
1346 break;
1349 #ifdef TC_Z80
1350 /* "af'" is a symbol containing '\''. */
1351 if (state == 3 && (ch == 'a' || ch == 'A'))
1353 state = 16;
1354 PUT (ch);
1355 ch = GET ();
1356 if (ch == 'f' || ch == 'F')
1358 state = 17;
1359 PUT (ch);
1360 break;
1362 else
1364 state = 9;
1365 if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1367 if (ch != EOF)
1368 UNGET (ch);
1369 break;
1373 #endif
1374 if (state == 3)
1375 state = 9;
1377 /* This is a common case. Quickly copy CH and all the
1378 following symbol component or normal characters. */
1379 if (to + 1 < toend
1380 && mri_state == NULL
1381 #if defined TC_ARM && defined OBJ_ELF
1382 && symver_state == NULL
1383 #endif
1386 char *s;
1387 ptrdiff_t len;
1389 for (s = from; s < fromend; s++)
1391 int type;
1393 ch2 = *(unsigned char *) s;
1394 type = lex[ch2];
1395 if (type != 0
1396 && type != LEX_IS_SYMBOL_COMPONENT)
1397 break;
1400 if (s > from)
1401 /* Handle the last character normally, for
1402 simplicity. */
1403 --s;
1405 len = s - from;
1407 if (len > (toend - to) - 1)
1408 len = (toend - to) - 1;
1410 if (len > 0)
1412 PUT (ch);
1413 memcpy (to, from, len);
1414 to += len;
1415 from += len;
1416 if (to >= toend)
1417 goto tofull;
1418 ch = GET ();
1422 /* Fall through. */
1423 default:
1424 de_fault:
1425 /* Some relatively `normal' character. */
1426 if (state == 0)
1428 state = 11; /* Now seeing label definition. */
1430 else if (state == 1)
1432 state = 2; /* Ditto. */
1434 else if (state == 9)
1436 if (!IS_SYMBOL_COMPONENT (ch))
1437 state = 3;
1439 else if (state == 10)
1441 if (ch == '\\')
1443 /* Special handling for backslash: a backslash may
1444 be the beginning of a formal parameter (of a
1445 macro) following another symbol character, with
1446 whitespace in between. If that is the case, we
1447 output a space before the parameter. Strictly
1448 speaking, correct handling depends upon what the
1449 macro parameter expands into; if the parameter
1450 expands into something which does not start with
1451 an operand character, then we don't want to keep
1452 the space. We don't have enough information to
1453 make the right choice, so here we are making the
1454 choice which is more likely to be correct. */
1455 if (to + 1 >= toend)
1457 /* If we're near the end of the buffer, save the
1458 character for the next time round. Otherwise
1459 we'll lose our state. */
1460 UNGET (ch);
1461 goto tofull;
1463 *to++ = ' ';
1466 state = 3;
1468 PUT (ch);
1469 break;
1473 /*NOTREACHED*/
1475 fromeof:
1476 /* We have reached the end of the input. */
1477 #ifdef TC_ARM
1478 if (to > tostart)
1479 last_char = to[-1];
1480 #endif
1481 return to - tostart;
1483 tofull:
1484 /* The output buffer is full. Save any input we have not yet
1485 processed. */
1486 if (fromend > from)
1488 saved_input = from;
1489 saved_input_len = fromend - from;
1491 else
1492 saved_input = NULL;
1494 #ifdef TC_ARM
1495 if (to > tostart)
1496 last_char = to[-1];
1497 #endif
1498 return to - tostart;