ld: Move the .note.build-id section to near the start of the memory map.
[binutils-gdb.git] / gas / app.c
blob8dc69ff4ce04e61866104f0716a232126318ec4c
1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987-2024 Free Software Foundation, Inc.
4 This file is part of GAS, the GNU Assembler.
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GAS is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
22 /* App, the assembler pre-processor. This pre-processor strips out
23 excess spaces, turns single-quoted characters into a decimal
24 constant, and turns the # in # <number> <filename> <garbage> into a
25 .linefile. This needs better error-handling. */
27 #include "as.h"
29 #if (__STDC__ != 1)
30 #ifndef const
31 #define const /* empty */
32 #endif
33 #endif
35 #ifdef H_TICK_HEX
36 int enable_h_tick_hex = 0;
37 #endif
39 #ifdef TC_M68K
40 /* Whether we are scrubbing in m68k MRI mode. This is different from
41 flag_m68k_mri, because the two flags will be affected by the .mri
42 pseudo-op at different times. */
43 static int scrub_m68k_mri;
45 /* The pseudo-op which switches in and out of MRI mode. See the
46 comment in do_scrub_chars. */
47 static const char mri_pseudo[] = ".mri 0";
48 static const char *mri_state;
49 static char mri_last_ch;
50 #else
51 #define scrub_m68k_mri 0
52 #endif
54 #if defined TC_ARM && defined OBJ_ELF
55 /* The pseudo-op for which we need to special-case `@' characters.
56 See the comment in do_scrub_chars. */
57 static const char symver_pseudo[] = ".symver";
58 static const char * symver_state;
59 #endif
61 /* The pseudo-op (without leading dot) at which we want to (perhaps just
62 temporarily) stop processing. See the comments in do_scrub_chars(). */
63 static const char end_pseudo[] = "end ";
64 static const char * end_state;
66 /* Whether, considering the state at start of assembly, NO_PSEUDO_DOT is
67 active. */
68 static bool no_pseudo_dot;
70 static char last_char;
72 #define LEX_IS_SYMBOL_COMPONENT 1
73 #define LEX_IS_WHITESPACE 2
74 #define LEX_IS_LINE_SEPARATOR 3
75 #define LEX_IS_COMMENT_START 4
76 #define LEX_IS_LINE_COMMENT_START 5
77 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
78 #define LEX_IS_STRINGQUOTE 8
79 #define LEX_IS_COLON 9
80 #define LEX_IS_NEWLINE 10
81 #define LEX_IS_ONECHAR_QUOTE 11
82 #ifdef TC_V850
83 #define LEX_IS_DOUBLEDASH_1ST 12
84 #endif
85 #ifdef DOUBLEBAR_PARALLEL
86 #define LEX_IS_DOUBLEBAR_1ST 13
87 #endif
88 #define LEX_IS_PARALLEL_SEPARATOR 14
89 #ifdef H_TICK_HEX
90 #define LEX_IS_H 15
91 #endif
92 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
93 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
94 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
95 #define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
96 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
97 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
98 #define IS_TWOCHAR_COMMENT_1ST(c) (lex[c] == LEX_IS_TWOCHAR_COMMENT_1ST)
99 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
101 static char lex[256] = {
102 [' '] = LEX_IS_WHITESPACE,
103 ['\t'] = LEX_IS_WHITESPACE,
104 #ifdef CR_EOL
105 ['\r'] = LEX_IS_LINE_SEPARATOR,
106 #else
107 ['\r'] = LEX_IS_WHITESPACE,
108 #endif
109 ['\n'] = LEX_IS_NEWLINE,
110 [':'] = LEX_IS_COLON,
111 ['$'] = LEX_IS_SYMBOL_COMPONENT,
112 ['.'] = LEX_IS_SYMBOL_COMPONENT,
113 ['_'] = LEX_IS_SYMBOL_COMPONENT,
114 ['A'] = LEX_IS_SYMBOL_COMPONENT, ['a'] = LEX_IS_SYMBOL_COMPONENT,
115 ['B'] = LEX_IS_SYMBOL_COMPONENT, ['b'] = LEX_IS_SYMBOL_COMPONENT,
116 ['C'] = LEX_IS_SYMBOL_COMPONENT, ['c'] = LEX_IS_SYMBOL_COMPONENT,
117 ['D'] = LEX_IS_SYMBOL_COMPONENT, ['d'] = LEX_IS_SYMBOL_COMPONENT,
118 ['E'] = LEX_IS_SYMBOL_COMPONENT, ['e'] = LEX_IS_SYMBOL_COMPONENT,
119 ['F'] = LEX_IS_SYMBOL_COMPONENT, ['f'] = LEX_IS_SYMBOL_COMPONENT,
120 ['G'] = LEX_IS_SYMBOL_COMPONENT, ['g'] = LEX_IS_SYMBOL_COMPONENT,
121 ['H'] = LEX_IS_SYMBOL_COMPONENT, ['h'] = LEX_IS_SYMBOL_COMPONENT,
122 ['I'] = LEX_IS_SYMBOL_COMPONENT, ['i'] = LEX_IS_SYMBOL_COMPONENT,
123 ['J'] = LEX_IS_SYMBOL_COMPONENT, ['j'] = LEX_IS_SYMBOL_COMPONENT,
124 ['K'] = LEX_IS_SYMBOL_COMPONENT, ['k'] = LEX_IS_SYMBOL_COMPONENT,
125 ['L'] = LEX_IS_SYMBOL_COMPONENT, ['l'] = LEX_IS_SYMBOL_COMPONENT,
126 ['M'] = LEX_IS_SYMBOL_COMPONENT, ['m'] = LEX_IS_SYMBOL_COMPONENT,
127 ['N'] = LEX_IS_SYMBOL_COMPONENT, ['n'] = LEX_IS_SYMBOL_COMPONENT,
128 ['O'] = LEX_IS_SYMBOL_COMPONENT, ['o'] = LEX_IS_SYMBOL_COMPONENT,
129 ['P'] = LEX_IS_SYMBOL_COMPONENT, ['p'] = LEX_IS_SYMBOL_COMPONENT,
130 ['Q'] = LEX_IS_SYMBOL_COMPONENT, ['q'] = LEX_IS_SYMBOL_COMPONENT,
131 ['R'] = LEX_IS_SYMBOL_COMPONENT, ['r'] = LEX_IS_SYMBOL_COMPONENT,
132 ['S'] = LEX_IS_SYMBOL_COMPONENT, ['s'] = LEX_IS_SYMBOL_COMPONENT,
133 ['T'] = LEX_IS_SYMBOL_COMPONENT, ['t'] = LEX_IS_SYMBOL_COMPONENT,
134 ['U'] = LEX_IS_SYMBOL_COMPONENT, ['u'] = LEX_IS_SYMBOL_COMPONENT,
135 ['V'] = LEX_IS_SYMBOL_COMPONENT, ['v'] = LEX_IS_SYMBOL_COMPONENT,
136 ['W'] = LEX_IS_SYMBOL_COMPONENT, ['w'] = LEX_IS_SYMBOL_COMPONENT,
137 ['X'] = LEX_IS_SYMBOL_COMPONENT, ['x'] = LEX_IS_SYMBOL_COMPONENT,
138 ['Y'] = LEX_IS_SYMBOL_COMPONENT, ['y'] = LEX_IS_SYMBOL_COMPONENT,
139 ['Z'] = LEX_IS_SYMBOL_COMPONENT, ['z'] = LEX_IS_SYMBOL_COMPONENT,
140 ['0'] = LEX_IS_SYMBOL_COMPONENT,
141 ['1'] = LEX_IS_SYMBOL_COMPONENT,
142 ['2'] = LEX_IS_SYMBOL_COMPONENT,
143 ['3'] = LEX_IS_SYMBOL_COMPONENT,
144 ['4'] = LEX_IS_SYMBOL_COMPONENT,
145 ['5'] = LEX_IS_SYMBOL_COMPONENT,
146 ['6'] = LEX_IS_SYMBOL_COMPONENT,
147 ['7'] = LEX_IS_SYMBOL_COMPONENT,
148 ['8'] = LEX_IS_SYMBOL_COMPONENT,
149 ['9'] = LEX_IS_SYMBOL_COMPONENT,
150 #define INIT2(n) [n] = LEX_IS_SYMBOL_COMPONENT, \
151 [(n) + 1] = LEX_IS_SYMBOL_COMPONENT
152 #define INIT4(n) INIT2 (n), INIT2 ((n) + 2)
153 #define INIT8(n) INIT4 (n), INIT4 ((n) + 4)
154 #define INIT16(n) INIT8 (n), INIT8 ((n) + 8)
155 #define INIT32(n) INIT16 (n), INIT16 ((n) + 16)
156 #define INIT64(n) INIT32 (n), INIT32 ((n) + 32)
157 #define INIT128(n) INIT64 (n), INIT64 ((n) + 64)
158 INIT128 (128),
159 #undef INIT128
160 #undef INIT64
161 #undef INIT32
162 #undef INIT16
163 #undef INIT8
164 #undef INIT4
165 #undef INIT2
168 void
169 do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
171 const char *p;
173 /* Latch this once at start. xtensa uses a hook function, yet context isn't
174 meaningful for scrubbing (or else we'd need to sync scrubber behavior as
175 state changes). */
176 if (lex['/'] == 0)
177 no_pseudo_dot = NO_PSEUDO_DOT;
179 #ifdef TC_M68K
180 scrub_m68k_mri = m68k_mri;
182 if (! m68k_mri)
183 #endif
185 lex['"'] = LEX_IS_STRINGQUOTE;
187 #if ! defined (TC_HPPA)
188 lex['\''] = LEX_IS_ONECHAR_QUOTE;
189 #endif
191 #ifdef SINGLE_QUOTE_STRINGS
192 lex['\''] = LEX_IS_STRINGQUOTE;
193 #endif
196 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
197 in state 5 of do_scrub_chars must be changed. */
199 /* Note that these override the previous defaults, e.g. if ';' is a
200 comment char, then it isn't a line separator. */
202 #ifdef tc_symbol_chars
203 /* This macro permits the processor to specify all characters which
204 may appears in an operand. This will prevent the scrubber from
205 discarding meaningful whitespace in certain cases. The i386
206 backend uses this to support prefixes, which can confuse the
207 scrubber as to whether it is parsing operands or opcodes. */
208 for (p = tc_symbol_chars; *p; ++p)
209 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
210 #endif
212 /* The m68k backend wants to be able to change comment_chars. */
213 #ifndef tc_comment_chars
214 #define tc_comment_chars comment_chars
215 #endif
216 for (p = tc_comment_chars; *p; p++)
217 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
219 /* While counter intuitive to have more special purpose line comment chars
220 override more general purpose ordinary ones, logic in do_scrub_chars()
221 depends on this ordering. */
222 for (p = line_comment_chars; *p; p++)
223 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
225 #ifndef tc_line_separator_chars
226 #define tc_line_separator_chars line_separator_chars
227 #endif
228 for (p = tc_line_separator_chars; *p; p++)
229 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
231 #ifdef tc_parallel_separator_chars
232 /* This macro permits the processor to specify all characters which
233 separate parallel insns on the same line. */
234 for (p = tc_parallel_separator_chars; *p; p++)
235 lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
236 #endif
238 /* Only allow slash-star comments if slash is not in use. Certain
239 other cases are dealt with in LEX_IS_LINE_COMMENT_START handling.
240 FIXME: This isn't right. We should always permit them. */
241 if (lex['/'] == 0)
242 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
244 #ifdef TC_M68K
245 if (m68k_mri)
247 lex['\''] = LEX_IS_STRINGQUOTE;
248 lex[';'] = LEX_IS_COMMENT_START;
249 lex['*'] = LEX_IS_LINE_COMMENT_START;
250 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
251 then it can't be used in an expression. */
252 lex['!'] = LEX_IS_LINE_COMMENT_START;
254 #endif
256 #ifdef TC_V850
257 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
258 #endif
259 #ifdef DOUBLEBAR_PARALLEL
260 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
261 #endif
262 #ifdef TC_D30V
263 /* Must do this is we want VLIW instruction with "->" or "<-". */
264 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
265 #endif
267 #ifdef H_TICK_HEX
268 if (enable_h_tick_hex)
270 lex['h'] = LEX_IS_H;
271 lex['H'] = LEX_IS_H;
273 #endif
276 /* Saved state of the scrubber. */
277 static int state;
278 static int old_state;
279 static const char *out_string;
280 static char out_buf[20];
281 static int add_newlines;
282 static char *saved_input;
283 static size_t saved_input_len;
284 static char input_buffer[32 * 1024];
286 /* Data structure for saving the state of app across #include's. Note that
287 app is called asynchronously to the parsing of the .include's, so our
288 state at the time .include is interpreted is completely unrelated.
289 That's why we have to save it all. */
291 struct app_save
293 int state;
294 int old_state;
295 const char * out_string;
296 char out_buf[sizeof (out_buf)];
297 int add_newlines;
298 char * saved_input;
299 size_t saved_input_len;
300 const char * end_state;
301 #ifdef TC_M68K
302 int scrub_m68k_mri;
303 const char * mri_state;
304 char mri_last_ch;
305 #endif
306 #if defined TC_ARM && defined OBJ_ELF
307 const char * symver_state;
308 #endif
309 char last_char;
312 char *
313 app_push (void)
315 struct app_save *saved;
317 saved = XNEW (struct app_save);
318 saved->state = state;
319 saved->old_state = old_state;
320 saved->out_string = out_string;
321 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
322 saved->add_newlines = add_newlines;
323 if (saved_input == NULL)
324 saved->saved_input = NULL;
325 else
327 saved->saved_input = XNEWVEC (char, saved_input_len);
328 memcpy (saved->saved_input, saved_input, saved_input_len);
329 saved->saved_input_len = saved_input_len;
331 saved->end_state = end_state;
332 #ifdef TC_M68K
333 saved->scrub_m68k_mri = scrub_m68k_mri;
334 saved->mri_state = mri_state;
335 saved->mri_last_ch = mri_last_ch;
336 #endif
337 #if defined TC_ARM && defined OBJ_ELF
338 saved->symver_state = symver_state;
339 #endif
340 saved->last_char = last_char;
342 /* do_scrub_begin() is not useful, just wastes time. */
344 state = 0;
345 saved_input = NULL;
346 add_newlines = 0;
348 return (char *) saved;
351 void
352 app_pop (char *arg)
354 struct app_save *saved = (struct app_save *) arg;
356 /* There is no do_scrub_end (). */
357 state = saved->state;
358 old_state = saved->old_state;
359 out_string = saved->out_string;
360 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
361 add_newlines = saved->add_newlines;
362 if (saved->saved_input == NULL)
363 saved_input = NULL;
364 else
366 gas_assert (saved->saved_input_len <= sizeof (input_buffer));
367 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
368 saved_input = input_buffer;
369 saved_input_len = saved->saved_input_len;
370 free (saved->saved_input);
372 end_state = saved->end_state;
373 #ifdef TC_M68K
374 scrub_m68k_mri = saved->scrub_m68k_mri;
375 mri_state = saved->mri_state;
376 mri_last_ch = saved->mri_last_ch;
377 #endif
378 #if defined TC_ARM && defined OBJ_ELF
379 symver_state = saved->symver_state;
380 #endif
381 last_char = saved->last_char;
383 free (arg);
386 /* @@ This assumes that \n &c are the same on host and target. This is not
387 necessarily true. */
389 static int
390 process_escape (int ch)
392 switch (ch)
394 case 'b':
395 return '\b';
396 case 'f':
397 return '\f';
398 case 'n':
399 return '\n';
400 case 'r':
401 return '\r';
402 case 't':
403 return '\t';
404 case '\'':
405 return '\'';
406 case '"':
407 return '\"';
408 default:
409 return ch;
413 #define MULTIBYTE_WARN_COUNT_LIMIT 10
414 static unsigned int multibyte_warn_count = 0;
416 bool
417 scan_for_multibyte_characters (const unsigned char * start,
418 const unsigned char * end,
419 bool warn)
421 if (end <= start)
422 return false;
424 if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
425 return false;
427 bool found = false;
429 while (start < end)
431 unsigned char c;
433 if ((c = * start++) <= 0x7f)
434 continue;
436 if (!warn)
437 return true;
439 found = true;
441 const char * filename;
442 unsigned int lineno;
444 filename = as_where (& lineno);
445 if (filename == NULL)
446 as_warn (_("multibyte character (%#x) encountered in input"), c);
447 else if (lineno == 0)
448 as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
449 else
450 as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
452 if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
454 as_warn (_("further multibyte character warnings suppressed"));
455 break;
459 return found;
462 /* This function is called to process input characters. The GET
463 parameter is used to retrieve more input characters. GET should
464 set its parameter to point to a buffer, and return the length of
465 the buffer; it should return 0 at end of file. The scrubbed output
466 characters are put into the buffer starting at TOSTART; the TOSTART
467 buffer is TOLEN bytes in length. The function returns the number
468 of scrubbed characters put into TOSTART. This will be TOLEN unless
469 end of file was seen. This function is arranged as a state
470 machine, and saves its state so that it may return at any point.
471 This is the way the old code used to work. */
473 size_t
474 do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
475 bool check_multibyte)
477 char *to = tostart;
478 char *toend = tostart + tolen;
479 char *from;
480 char *fromend;
481 size_t fromlen;
482 int ch, ch2 = 0;
483 /* Character that started the string we're working on. */
484 static char quotechar;
486 /*State 0: beginning of normal line
487 1: After first whitespace on line (flush more white)
488 2: After first non-white (opcode) on line (keep 1white)
489 3: after second white on line (into operands) (flush white)
490 4: after putting out a .linefile, put out digits
491 5: parsing a string, then go to old-state
492 6: putting out \ escape in a "d string.
493 7: no longer used
494 8: no longer used
495 9: After seeing symbol char in state 3 (keep 1white after symchar)
496 10: After seeing whitespace in state 9 (keep white before symchar)
497 11: After seeing a symbol character in state 0 (eg a label definition)
498 -1: output string in out_string and go to the state in old_state
499 12: no longer used
500 #ifdef DOUBLEBAR_PARALLEL
501 13: After seeing a vertical bar, looking for a second
502 vertical bar as a parallel expression separator.
503 #endif
504 #ifdef TC_PREDICATE_START_CHAR
505 14: After seeing a predicate start character at state 0, looking
506 for a predicate end character as predicate.
507 15: After seeing a predicate start character at state 1, looking
508 for a predicate end character as predicate.
509 #endif
510 #ifdef TC_Z80
511 16: After seeing an 'a' or an 'A' at the start of a symbol
512 17: After seeing an 'f' or an 'F' in state 16
513 #endif
516 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
517 constructs like ``.loc 1 20''. This was turning into ``.loc
518 120''. States 9 and 10 ensure that a space is never dropped in
519 between characters which could appear in an identifier. Ian
520 Taylor, ian@cygnus.com.
522 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
523 correctly on the PA (and any other target where colons are optional).
524 Jeff Law, law@cs.utah.edu.
526 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
527 get squashed into "cmp r1,r2||trap#1", with the all important space
528 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
530 /* This macro gets the next input character. */
532 #define GET() \
533 (from < fromend \
534 ? * (unsigned char *) (from++) \
535 : (saved_input = NULL, \
536 fromlen = (*get) (input_buffer, sizeof input_buffer), \
537 from = input_buffer, \
538 fromend = from + fromlen, \
539 (fromlen == 0 \
540 ? EOF \
541 : * (unsigned char *) (from++))))
543 /* This macro pushes a character back on the input stream. */
545 #define UNGET(uch) (*--from = (uch))
547 /* This macro puts a character into the output buffer. If this
548 character fills the output buffer, this macro jumps to the label
549 TOFULL. We use this rather ugly approach because we need to
550 handle two different termination conditions: EOF on the input
551 stream, and a full output buffer. It would be simpler if we
552 always read in the entire input stream before processing it, but
553 I don't want to make such a significant change to the assembler's
554 memory usage. */
556 #define PUT(pch) \
557 do \
559 *to++ = (pch); \
560 if (to >= toend) \
561 goto tofull; \
563 while (0)
565 if (saved_input != NULL)
567 from = saved_input;
568 fromend = from + saved_input_len;
570 else
572 fromlen = (*get) (input_buffer, sizeof input_buffer);
573 if (fromlen == 0)
574 return 0;
575 from = input_buffer;
576 fromend = from + fromlen;
578 if (check_multibyte)
579 (void) scan_for_multibyte_characters ((const unsigned char *) from,
580 (const unsigned char* ) fromend,
581 true /* Generate warnings. */);
584 while (1)
586 /* The cases in this switch end with continue, in order to
587 branch back to the top of this while loop and generate the
588 next output character in the appropriate state. */
589 switch (state)
591 case -1:
592 ch = *out_string++;
593 if (*out_string == '\0')
595 state = old_state;
596 old_state = 3;
598 PUT (ch);
599 continue;
601 case 4:
602 ch = GET ();
603 if (ch == EOF)
604 goto fromeof;
605 else if (ch >= '0' && ch <= '9')
606 PUT (ch);
607 else
609 while (ch != EOF && IS_WHITESPACE (ch))
610 ch = GET ();
611 if (ch == '"')
613 quotechar = ch;
614 state = 5;
615 old_state = 3;
616 PUT (ch);
618 else
620 while (ch != EOF && ch != '\n')
621 ch = GET ();
622 state = 0;
623 PUT (ch);
626 continue;
628 case 5:
629 /* We are going to copy everything up to a quote character,
630 with special handling for a backslash. We try to
631 optimize the copying in the simple case without using the
632 GET and PUT macros. */
634 char *s;
635 ptrdiff_t len;
637 for (s = from; s < fromend; s++)
639 ch = *s;
640 if (ch == '\\'
641 || ch == quotechar
642 || ch == '\n')
643 break;
645 len = s - from;
646 if (len > toend - to)
647 len = toend - to;
648 if (len > 0)
650 memcpy (to, from, len);
651 to += len;
652 from += len;
653 if (to >= toend)
654 goto tofull;
658 ch = GET ();
659 if (ch == EOF)
661 /* This buffer is here specifically so
662 that the UNGET below will work. */
663 static char one_char_buf[1];
665 as_warn (_("end of file in string; '%c' inserted"), quotechar);
666 state = old_state;
667 from = fromend = one_char_buf + 1;
668 fromlen = 1;
669 UNGET ('\n');
670 PUT (quotechar);
672 else if (ch == quotechar)
674 state = old_state;
675 PUT (ch);
677 else if (TC_STRING_ESCAPES && ch == '\\')
679 state = 6;
680 PUT (ch);
682 else if (scrub_m68k_mri && ch == '\n')
684 /* Just quietly terminate the string. This permits lines like
685 bne label loop if we haven't reach end yet. */
686 state = old_state;
687 UNGET (ch);
688 PUT ('\'');
690 else
692 PUT (ch);
694 continue;
696 case 6:
697 state = 5;
698 ch = GET ();
699 switch (ch)
701 /* Handle strings broken across lines, by turning '\n' into
702 '\\' and 'n'. */
703 case '\n':
704 UNGET ('n');
705 add_newlines++;
706 PUT ('\\');
707 continue;
709 case EOF:
710 as_warn (_("end of file in string; '%c' inserted"), quotechar);
711 PUT (quotechar);
712 continue;
714 /* These two are used inside macros. */
715 case '@':
716 case '+':
717 break;
719 case '"':
720 case '\\':
721 case 'b':
722 case 'f':
723 case 'n':
724 case 'r':
725 case 't':
726 case 'v':
727 case 'x':
728 case 'X':
729 case '0':
730 case '1':
731 case '2':
732 case '3':
733 case '4':
734 case '5':
735 case '6':
736 case '7':
737 break;
739 default:
740 #ifdef ONLY_STANDARD_ESCAPES
741 as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
742 #endif
743 break;
745 PUT (ch);
746 continue;
748 #ifdef DOUBLEBAR_PARALLEL
749 case 13:
750 ch = GET ();
751 if (ch != '|')
752 abort ();
754 /* Reset back to state 1 and pretend that we are parsing a
755 line from just after the first white space. */
756 state = 1;
757 PUT ('|');
758 continue;
759 #endif
760 #ifdef TC_Z80
761 case 16:
762 /* We have seen an 'a' at the start of a symbol, look for an 'f'. */
763 ch = GET ();
764 if (ch == 'f' || ch == 'F')
766 state = 17;
767 PUT (ch);
769 else
771 if (ch != EOF)
772 UNGET (ch);
773 state = 9;
774 break;
776 /* Fall through. */
777 case 17:
778 /* We have seen "af" at the start of a symbol,
779 a ' here is a part of that symbol. */
780 ch = GET ();
781 state = 9;
782 if (ch == '\'')
783 /* Change to avoid warning about unclosed string. */
784 PUT ('`');
785 else if (ch != EOF)
786 UNGET (ch);
787 break;
788 #endif
791 /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
793 /* flushchar: */
794 ch = GET ();
796 #ifdef TC_PREDICATE_START_CHAR
797 if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
799 state += 14;
800 PUT (ch);
801 continue;
803 else if (state == 14 || state == 15)
805 if (ch == TC_PREDICATE_END_CHAR)
807 state -= 14;
808 PUT (ch);
809 ch = GET ();
811 else
813 PUT (ch);
814 continue;
817 #endif
819 recycle:
821 /* We need to watch out for .end directives: We should in particular not
822 issue diagnostics for anything after an active one. */
823 if (end_state == NULL)
825 if ((state == 0 || state == 1)
826 && (ch == '.'
827 || (no_pseudo_dot && ch == end_pseudo[0])))
828 end_state = end_pseudo + (ch != '.');
830 else if (ch != '\0'
831 && (*end_state == ch
832 /* Avoid triggering on directives like .endif or .endr. */
833 || (*end_state == ' ' && !IS_SYMBOL_COMPONENT (ch))))
835 if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
836 goto end_end;
837 ++end_state;
839 else if (*end_state != '\0')
840 /* We did not get the expected character, or we didn't
841 get a valid terminating character after seeing the
842 entire pseudo-op, so we must go back to the beginning. */
843 end_state = NULL;
844 else if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
846 end_end:
847 /* We've read the entire pseudo-op. If this is the end of the line,
848 bail out now by (ab)using the output-full path. This allows the
849 caller to process input up to here and terminate processing if this
850 directive is actually active (not on the false branch of a
851 conditional and not in a macro definition). */
852 end_state = NULL;
853 state = 0;
854 PUT (ch);
855 goto tofull;
858 #if defined TC_ARM && defined OBJ_ELF
859 /* We need to watch out for .symver directives. See the comment later
860 in this function. */
861 if (symver_state == NULL)
863 if ((state == 0 || state == 1)
864 && strchr (tc_comment_chars, '@') != NULL
865 && ch == symver_pseudo[0])
866 symver_state = symver_pseudo + 1;
868 else
870 /* We advance to the next state if we find the right
871 character. */
872 if (ch != '\0' && (*symver_state == ch))
873 ++symver_state;
874 else if (*symver_state != '\0')
875 /* We did not get the expected character, or we didn't
876 get a valid terminating character after seeing the
877 entire pseudo-op, so we must go back to the beginning. */
878 symver_state = NULL;
879 else
881 /* We've read the entire pseudo-op. If this is the end
882 of the line, go back to the beginning. */
883 if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
884 symver_state = NULL;
887 #endif /* TC_ARM && OBJ_ELF */
889 #ifdef TC_M68K
890 /* We want to have pseudo-ops which control whether we are in
891 MRI mode or not. Unfortunately, since m68k MRI mode affects
892 the scrubber, that means that we need a special purpose
893 recognizer here. */
894 if (mri_state == NULL)
896 if ((state == 0 || state == 1)
897 && ch == mri_pseudo[0])
898 mri_state = mri_pseudo + 1;
900 else
902 /* We advance to the next state if we find the right
903 character, or if we need a space character and we get any
904 whitespace character, or if we need a '0' and we get a
905 '1' (this is so that we only need one state to handle
906 ``.mri 0'' and ``.mri 1''). */
907 if (ch != '\0'
908 && (*mri_state == ch
909 || (*mri_state == ' '
910 && IS_WHITESPACE (ch))
911 || (*mri_state == '0'
912 && ch == '1')))
914 mri_last_ch = ch;
915 ++mri_state;
917 else if (*mri_state != '\0'
918 || (!IS_WHITESPACE (ch)
919 && !IS_LINE_SEPARATOR (ch)
920 && !IS_NEWLINE (ch)))
922 /* We did not get the expected character, or we didn't
923 get a valid terminating character after seeing the
924 entire pseudo-op, so we must go back to the
925 beginning. */
926 mri_state = NULL;
928 else
930 /* We've read the entire pseudo-op. mips_last_ch is
931 either '0' or '1' indicating whether to enter or
932 leave MRI mode. */
933 do_scrub_begin (mri_last_ch == '1');
934 mri_state = NULL;
936 /* We continue handling the character as usual. The
937 main gas reader must also handle the .mri pseudo-op
938 to control expression parsing and the like. */
941 #endif
943 if (ch == EOF)
945 if (state != 0)
947 as_warn (_("end of file not at end of a line; newline inserted"));
948 state = 0;
949 PUT ('\n');
951 goto fromeof;
954 switch (lex[ch])
956 case LEX_IS_WHITESPACE:
959 ch = GET ();
961 while (ch != EOF && IS_WHITESPACE (ch));
962 if (ch == EOF)
963 goto fromeof;
965 if (state == 0)
967 /* Preserve a single whitespace character at the
968 beginning of a line. */
969 state = 1;
970 UNGET (ch);
971 PUT (' ');
972 break;
975 #ifdef KEEP_WHITE_AROUND_COLON
976 if (lex[ch] == LEX_IS_COLON)
978 /* Only keep this white if there's no white *after* the
979 colon. */
980 ch2 = GET ();
981 if (ch2 != EOF)
982 UNGET (ch2);
983 if (!IS_WHITESPACE (ch2))
985 state = 9;
986 UNGET (ch);
987 PUT (' ');
988 break;
991 #endif
993 /* Prune trailing whitespace. */
994 if (IS_COMMENT (ch)
995 || (IS_LINE_COMMENT (ch)
996 && (state < 1 || strchr (tc_comment_chars, ch)))
997 || IS_NEWLINE (ch)
998 || IS_LINE_SEPARATOR (ch)
999 || IS_PARALLEL_SEPARATOR (ch))
1001 if (scrub_m68k_mri)
1003 /* In MRI mode, we keep these spaces. */
1004 UNGET (ch);
1005 PUT (' ');
1006 break;
1008 goto recycle;
1010 #ifdef DOUBLESLASH_LINE_COMMENTS
1011 if (IS_TWOCHAR_COMMENT_1ST (ch))
1013 ch2 = GET ();
1014 if (ch2 != EOF)
1015 UNGET (ch2);
1016 if (ch2 == '/')
1017 goto recycle;
1019 #endif
1021 /* If we're in state 2 or 11, we've seen a non-white
1022 character followed by whitespace. If the next character
1023 is ':', this is whitespace after a label name which we
1024 normally must ignore. In MRI mode, though, spaces are
1025 not permitted between the label and the colon. */
1026 if ((state == 2 || state == 11)
1027 && lex[ch] == LEX_IS_COLON
1028 && ! scrub_m68k_mri)
1030 state = 1;
1031 PUT (ch);
1032 break;
1035 switch (state)
1037 case 1:
1038 /* We can arrive here if we leave a leading whitespace
1039 character at the beginning of a line. */
1040 goto recycle;
1041 case 2:
1042 state = 3;
1043 if (to + 1 < toend)
1045 /* Optimize common case by skipping UNGET/GET. */
1046 PUT (' '); /* Sp after opco */
1047 goto recycle;
1049 UNGET (ch);
1050 PUT (' ');
1051 break;
1052 case 3:
1053 #ifndef TC_KEEP_OPERAND_SPACES
1054 /* For TI C6X, we keep these spaces as they may separate
1055 functional unit specifiers from operands. */
1056 if (scrub_m68k_mri)
1057 #endif
1059 /* In MRI mode, we keep these spaces. */
1060 UNGET (ch);
1061 PUT (' ');
1062 break;
1064 goto recycle; /* Sp in operands */
1065 case 9:
1066 case 10:
1067 #ifndef TC_KEEP_OPERAND_SPACES
1068 if (scrub_m68k_mri)
1069 #endif
1071 /* In MRI mode, we keep these spaces. */
1072 state = 3;
1073 UNGET (ch);
1074 PUT (' ');
1075 break;
1077 state = 10; /* Sp after symbol char */
1078 goto recycle;
1079 case 11:
1080 if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
1081 state = 1;
1082 else
1084 /* We know that ch is not ':', since we tested that
1085 case above. Therefore this is not a label, so it
1086 must be the opcode, and we've just seen the
1087 whitespace after it. */
1088 state = 3;
1090 UNGET (ch);
1091 PUT (' '); /* Sp after label definition. */
1092 break;
1093 default:
1094 BAD_CASE (state);
1096 break;
1098 case LEX_IS_TWOCHAR_COMMENT_1ST:
1099 ch2 = GET ();
1100 if (ch2 == '*')
1102 twochar_comment:
1103 for (;;)
1107 ch2 = GET ();
1108 if (ch2 != EOF && IS_NEWLINE (ch2))
1109 add_newlines++;
1111 while (ch2 != EOF && ch2 != '*');
1113 while (ch2 == '*')
1114 ch2 = GET ();
1116 if (ch2 == EOF || ch2 == '/')
1117 break;
1119 /* This UNGET will ensure that we count newlines
1120 correctly. */
1121 UNGET (ch2);
1124 if (ch2 == EOF)
1125 as_warn (_("end of file in multiline comment"));
1127 ch = ' ';
1128 goto recycle;
1130 #ifdef DOUBLESLASH_LINE_COMMENTS
1131 else if (ch2 == '/')
1135 ch = GET ();
1137 while (ch != EOF && !IS_NEWLINE (ch));
1138 if (ch == EOF)
1139 as_warn ("end of file in comment; newline inserted");
1140 state = 0;
1141 PUT ('\n');
1142 break;
1144 #endif
1145 else
1147 if (ch2 != EOF)
1148 UNGET (ch2);
1149 if (state == 9 || state == 10)
1150 state = 3;
1151 PUT (ch);
1153 break;
1155 case LEX_IS_STRINGQUOTE:
1156 quotechar = ch;
1157 if (state == 10)
1159 /* Preserve the whitespace in foo "bar". */
1160 UNGET (ch);
1161 state = 3;
1162 PUT (' ');
1164 /* PUT didn't jump out. We could just break, but we
1165 know what will happen, so optimize a bit. */
1166 ch = GET ();
1167 old_state = 9;
1169 else if (state == 3)
1170 old_state = 9;
1171 else if (state == 0)
1172 old_state = 11; /* Now seeing label definition. */
1173 else
1174 old_state = state;
1175 state = 5;
1176 PUT (ch);
1177 break;
1179 case LEX_IS_ONECHAR_QUOTE:
1180 #ifdef H_TICK_HEX
1181 if (state == 9 && enable_h_tick_hex)
1183 char c;
1185 c = GET ();
1186 as_warn ("'%c found after symbol", c);
1187 UNGET (c);
1189 #endif
1190 if (state == 10)
1192 /* Preserve the whitespace in foo 'b'. */
1193 UNGET (ch);
1194 state = 3;
1195 PUT (' ');
1196 break;
1198 ch = GET ();
1199 if (ch == EOF)
1201 as_warn (_("end of file after a one-character quote; \\0 inserted"));
1202 ch = 0;
1204 if (ch == '\\')
1206 ch = GET ();
1207 if (ch == EOF)
1209 as_warn (_("end of file in escape character"));
1210 ch = '\\';
1212 else
1213 ch = process_escape (ch);
1215 sprintf (out_buf, "%d", (int) (unsigned char) ch);
1217 /* None of these 'x constants for us. We want 'x'. */
1218 if ((ch = GET ()) != '\'')
1220 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1221 as_warn (_("missing close quote; (assumed)"));
1222 #else
1223 if (ch != EOF)
1224 UNGET (ch);
1225 #endif
1227 if (strlen (out_buf) == 1)
1229 PUT (out_buf[0]);
1230 break;
1232 if (state == 9)
1233 old_state = 3;
1234 else
1235 old_state = state;
1236 state = -1;
1237 out_string = out_buf;
1238 PUT (*out_string++);
1239 break;
1241 case LEX_IS_COLON:
1242 #ifdef KEEP_WHITE_AROUND_COLON
1243 state = 9;
1244 #else
1245 if (state == 9 || state == 10)
1246 state = 3;
1247 else if (state != 3)
1248 state = 1;
1249 #endif
1250 PUT (ch);
1251 break;
1253 case LEX_IS_NEWLINE:
1254 /* Roll out a bunch of newlines from inside comments, etc. */
1255 if (add_newlines)
1257 --add_newlines;
1258 UNGET (ch);
1260 /* Fall through. */
1262 case LEX_IS_LINE_SEPARATOR:
1263 state = 0;
1264 PUT (ch);
1265 break;
1267 case LEX_IS_PARALLEL_SEPARATOR:
1268 state = 1;
1269 PUT (ch);
1270 break;
1272 #ifdef TC_V850
1273 case LEX_IS_DOUBLEDASH_1ST:
1274 ch2 = GET ();
1275 if (ch2 != '-')
1277 if (ch2 != EOF)
1278 UNGET (ch2);
1279 goto de_fault;
1281 /* Read and skip to end of line. */
1284 ch = GET ();
1286 while (ch != EOF && ch != '\n');
1288 if (ch == EOF)
1289 as_warn (_("end of file in comment; newline inserted"));
1291 state = 0;
1292 PUT ('\n');
1293 break;
1294 #endif
1295 #ifdef DOUBLEBAR_PARALLEL
1296 case LEX_IS_DOUBLEBAR_1ST:
1297 ch2 = GET ();
1298 if (ch2 != EOF)
1299 UNGET (ch2);
1300 if (ch2 != '|')
1301 goto de_fault;
1303 /* Handle '||' in two states as invoking PUT twice might
1304 result in the first one jumping out of this loop. We'd
1305 then lose track of the state and one '|' char. */
1306 state = 13;
1307 PUT ('|');
1308 break;
1309 #endif
1310 case LEX_IS_LINE_COMMENT_START:
1311 /* FIXME-someday: The two character comment stuff was badly
1312 thought out. On i386, we want '/' as line comment start
1313 AND we want C style comments. hence this hack. The
1314 whole lexical process should be reworked. xoxorich. */
1315 if (ch == '/')
1317 ch2 = GET ();
1318 if (ch2 == '*')
1319 goto twochar_comment;
1320 if (ch2 != EOF)
1321 UNGET (ch2);
1324 if (state == 0 || state == 1) /* Only comment at start of line. */
1326 int startch;
1328 startch = ch;
1332 ch = GET ();
1334 while (ch != EOF && IS_WHITESPACE (ch));
1336 if (ch == EOF)
1338 as_warn (_("end of file in comment; newline inserted"));
1339 PUT ('\n');
1340 break;
1343 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1345 /* Not a cpp line. */
1346 while (ch != EOF && !IS_NEWLINE (ch))
1347 ch = GET ();
1348 if (ch == EOF)
1350 as_warn (_("end of file in comment; newline inserted"));
1351 PUT ('\n');
1353 else /* IS_NEWLINE (ch) */
1355 /* To process non-zero add_newlines. */
1356 UNGET (ch);
1358 state = 0;
1359 break;
1361 /* Looks like `# 123 "filename"' from cpp. */
1362 UNGET (ch);
1363 old_state = 4;
1364 state = -1;
1365 if (scrub_m68k_mri)
1366 out_string = "\tlinefile ";
1367 else
1368 out_string = "\t.linefile ";
1369 PUT (*out_string++);
1370 break;
1373 #ifdef TC_D10V
1374 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1375 Trap is the only short insn that has a first operand that is
1376 neither register nor label.
1377 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1378 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1379 already LEX_IS_LINE_COMMENT_START. However, it is the
1380 only character in line_comment_chars for d10v, hence we
1381 can recognize it as such. */
1382 /* An alternative approach would be to reset the state to 1 when
1383 we see '||', '<'- or '->', but that seems to be overkill. */
1384 if (state == 10)
1385 PUT (' ');
1386 #endif
1387 /* We have a line comment character which is not at the
1388 start of a line. If this is also a normal comment
1389 character, fall through. Otherwise treat it as a default
1390 character. */
1391 if (strchr (tc_comment_chars, ch) == NULL)
1392 goto de_fault;
1393 if (scrub_m68k_mri
1394 && (ch == '!' || ch == '*' || ch == '#'))
1395 goto de_fault;
1396 /* Fall through. */
1397 case LEX_IS_COMMENT_START:
1398 #if defined TC_ARM && defined OBJ_ELF
1399 /* On the ARM, `@' is the comment character.
1400 Unfortunately this is also a special character in ELF .symver
1401 directives (and .type, though we deal with those another way).
1402 So we check if this line is such a directive, and treat
1403 the character as default if so. This is a hack. */
1404 if ((symver_state != NULL) && (*symver_state == 0))
1405 goto de_fault;
1406 #endif
1408 /* Care is needed not to damage occurrences of \<comment-char>
1409 by stripping the <comment-char> onwards. Yuck. */
1410 if ((to > tostart ? to[-1] : last_char) == '\\')
1411 /* Do not treat the <comment-char> as a start-of-comment. */
1412 goto de_fault;
1414 #ifdef WARN_COMMENTS
1415 if (!found_comment)
1416 found_comment_file = as_where (&found_comment);
1417 #endif
1420 ch = GET ();
1422 while (ch != EOF && !IS_NEWLINE (ch));
1423 if (ch == EOF)
1424 as_warn (_("end of file in comment; newline inserted"));
1425 state = 0;
1426 PUT ('\n');
1427 break;
1429 #ifdef H_TICK_HEX
1430 case LEX_IS_H:
1431 /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1432 the H' with 0x to make them gas-style hex characters. */
1433 if (enable_h_tick_hex)
1435 char quot;
1437 quot = GET ();
1438 if (quot == '\'')
1440 UNGET ('x');
1441 ch = '0';
1443 else
1444 UNGET (quot);
1446 #endif
1447 /* Fall through. */
1449 case LEX_IS_SYMBOL_COMPONENT:
1450 if (state == 10)
1452 /* This is a symbol character following another symbol
1453 character, with whitespace in between. We skipped
1454 the whitespace earlier, so output it now. */
1455 UNGET (ch);
1456 state = 3;
1457 PUT (' ');
1458 break;
1461 #ifdef TC_Z80
1462 /* "af'" is a symbol containing '\''. */
1463 if (state == 3 && (ch == 'a' || ch == 'A'))
1465 state = 16;
1466 PUT (ch);
1467 ch = GET ();
1468 if (ch == 'f' || ch == 'F')
1470 state = 17;
1471 PUT (ch);
1472 break;
1474 else
1476 state = 9;
1477 if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1479 if (ch != EOF)
1480 UNGET (ch);
1481 break;
1485 #endif
1486 if (state == 3)
1487 state = 9;
1489 /* This is a common case. Quickly copy CH and all the
1490 following symbol component or normal characters. */
1491 if (to + 1 < toend
1492 #ifdef TC_M68K
1493 && mri_state == NULL
1494 #endif
1495 #if defined TC_ARM && defined OBJ_ELF
1496 && symver_state == NULL
1497 #endif
1498 && end_state == NULL)
1500 char *s;
1501 ptrdiff_t len;
1503 for (s = from; s < fromend; s++)
1505 int type;
1507 ch2 = *(unsigned char *) s;
1508 type = lex[ch2];
1509 if (type != 0
1510 && type != LEX_IS_SYMBOL_COMPONENT)
1511 break;
1514 if (s > from)
1515 /* Handle the last character normally, for
1516 simplicity. */
1517 --s;
1519 len = s - from;
1521 if (len > (toend - to) - 1)
1522 len = (toend - to) - 1;
1524 if (len > 0)
1526 PUT (ch);
1527 memcpy (to, from, len);
1528 to += len;
1529 from += len;
1530 if (to >= toend)
1531 goto tofull;
1532 ch = GET ();
1536 /* Fall through. */
1537 default:
1538 de_fault:
1539 /* Some relatively `normal' character. */
1540 if (state == 0)
1542 state = 11; /* Now seeing label definition. */
1544 else if (state == 1)
1546 state = 2; /* Ditto. */
1548 else if (state == 9)
1550 if (!IS_SYMBOL_COMPONENT (ch))
1551 state = 3;
1553 else if (state == 10)
1555 if (ch == '\\')
1557 /* Special handling for backslash: a backslash may
1558 be the beginning of a formal parameter (of a
1559 macro) following another symbol character, with
1560 whitespace in between. If that is the case, we
1561 output a space before the parameter. Strictly
1562 speaking, correct handling depends upon what the
1563 macro parameter expands into; if the parameter
1564 expands into something which does not start with
1565 an operand character, then we don't want to keep
1566 the space. We don't have enough information to
1567 make the right choice, so here we are making the
1568 choice which is more likely to be correct. */
1569 if (to + 1 >= toend)
1571 /* If we're near the end of the buffer, save the
1572 character for the next time round. Otherwise
1573 we'll lose our state. */
1574 UNGET (ch);
1575 goto tofull;
1577 *to++ = ' ';
1580 state = 3;
1582 PUT (ch);
1583 break;
1587 /*NOTREACHED*/
1589 fromeof:
1590 /* We have reached the end of the input. */
1591 if (to > tostart)
1592 last_char = to[-1];
1593 return to - tostart;
1595 tofull:
1596 /* The output buffer is full. Save any input we have not yet
1597 processed. */
1598 if (fromend > from)
1600 saved_input = from;
1601 saved_input_len = fromend - from;
1603 else
1604 saved_input = NULL;
1606 if (to > tostart)
1607 last_char = to[-1];
1608 return to - tostart;
1611 /* Return amount of pending input. */
1613 size_t
1614 do_scrub_pending (void)
1616 size_t len = 0;
1617 if (saved_input)
1618 len += saved_input_len;
1619 if (state == -1)
1620 len += strlen (out_string);
1621 return len;