Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / x-scheme.c
blob297dda8a218fc9b24e3ec54a0b865ec76de12ad2
1 /* xgettext Scheme backend.
2 Copyright (C) 2004-2005 Free Software Foundation, Inc.
4 This file was written by Bruno Haible <bruno@clisp.org>, 2004-2005.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
24 #include <errno.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
30 #include "message.h"
31 #include "xgettext.h"
32 #include "x-scheme.h"
33 #include "error.h"
34 #include "xalloc.h"
35 #include "exit.h"
36 #include "hash.h"
37 #include "gettext.h"
39 #define _(s) gettext(s)
42 /* The Scheme syntax is described in R5RS. It is implemented in
43 guile-1.6.4/libguile/read.c.
44 Since we are interested only in strings and in forms similar to
45 (gettext msgid ...)
46 or (ngettext msgid msgid_plural ...)
47 we make the following simplifications:
49 - Assume the keywords and strings are in an ASCII compatible encoding.
50 This means we can read the input file one byte at a time, instead of
51 one character at a time. No need to worry about multibyte characters:
52 If they occur as part of identifiers, they most probably act as
53 constituent characters, and the byte based approach will do the same.
55 - Assume the read-hash-procedures is in the default state.
56 Non-standard reader extensions are mostly used to read data, not programs.
58 The remaining syntax rules are:
60 - The syntax code assigned to each character, and how tokens are built
61 up from characters (single escape, multiple escape etc.).
63 - Comment syntax: ';' and '#! ... \n!#\n'.
65 - String syntax: "..." with single escapes.
67 - Read macros and dispatch macro character '#'. Needed to be able to
68 tell which is the n-th argument of a function call.
73 /* ====================== Keyword set customization. ====================== */
75 /* If true extract all strings. */
76 static bool extract_all = false;
78 static hash_table keywords;
79 static bool default_keywords = true;
82 void
83 x_scheme_extract_all ()
85 extract_all = true;
89 void
90 x_scheme_keyword (const char *name)
92 if (name == NULL)
93 default_keywords = false;
94 else
96 const char *end;
97 int argnum1;
98 int argnum2;
99 const char *colon;
101 if (keywords.table == NULL)
102 init_hash (&keywords, 100);
104 split_keywordspec (name, &end, &argnum1, &argnum2);
106 /* The characters between name and end should form a valid Lisp symbol.
107 Extract the symbol name part. */
108 colon = strchr (name, ':');
109 if (colon != NULL && colon < end)
111 name = colon + 1;
112 if (name < end && *name == ':')
113 name++;
114 colon = strchr (name, ':');
115 if (colon != NULL && colon < end)
116 return;
119 if (argnum1 == 0)
120 argnum1 = 1;
121 insert_entry (&keywords, name, end - name,
122 (void *) (long) (argnum1 + (argnum2 << 10)));
126 /* Finish initializing the keywords hash table.
127 Called after argument processing, before each file is processed. */
128 static void
129 init_keywords ()
131 if (default_keywords)
133 x_scheme_keyword ("gettext"); /* libguile/i18n.c */
134 x_scheme_keyword ("ngettext:1,2"); /* libguile/i18n.c */
135 x_scheme_keyword ("gettext-noop");
136 default_keywords = false;
140 void
141 init_flag_table_scheme ()
143 xgettext_record_flag ("gettext:1:pass-scheme-format");
144 xgettext_record_flag ("ngettext:1:pass-scheme-format");
145 xgettext_record_flag ("ngettext:2:pass-scheme-format");
146 xgettext_record_flag ("gettext-noop:1:pass-scheme-format");
147 xgettext_record_flag ("format:2:scheme-format");
151 /* ======================== Reading of characters. ======================== */
153 /* Real filename, used in error messages about the input file. */
154 static const char *real_file_name;
156 /* Logical filename and line number, used to label the extracted messages. */
157 static char *logical_file_name;
158 static int line_number;
160 /* The input file stream. */
161 static FILE *fp;
164 /* Fetch the next character from the input file. */
165 static int
166 do_getc ()
168 int c = getc (fp);
170 if (c == EOF)
172 if (ferror (fp))
173 error (EXIT_FAILURE, errno, _("\
174 error while reading \"%s\""), real_file_name);
176 else if (c == '\n')
177 line_number++;
179 return c;
182 /* Put back the last fetched character, not EOF. */
183 static void
184 do_ungetc (int c)
186 if (c == '\n')
187 line_number--;
188 ungetc (c, fp);
192 /* ========================== Reading of tokens. ========================== */
195 /* A token consists of a sequence of characters. */
196 struct token
198 int allocated; /* number of allocated 'token_char's */
199 int charcount; /* number of used 'token_char's */
200 char *chars; /* the token's constituents */
203 /* Initialize a 'struct token'. */
204 static inline void
205 init_token (struct token *tp)
207 tp->allocated = 10;
208 tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
209 tp->charcount = 0;
212 /* Free the memory pointed to by a 'struct token'. */
213 static inline void
214 free_token (struct token *tp)
216 free (tp->chars);
219 /* Ensure there is enough room in the token for one more character. */
220 static inline void
221 grow_token (struct token *tp)
223 if (tp->charcount == tp->allocated)
225 tp->allocated *= 2;
226 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
230 /* Read the next token. 'first' is the first character, which has already
231 been read. */
232 static void
233 read_token (struct token *tp, int first)
235 init_token (tp);
237 grow_token (tp);
238 tp->chars[tp->charcount++] = first;
240 for (;;)
242 int c = do_getc ();
244 if (c == EOF)
245 break;
246 if (c == ' ' || c == '\r' || c == '\f' || c == '\t' || c == '\n'
247 || c == '"' || c == '(' || c == ')' || c == ';')
249 do_ungetc (c);
250 break;
252 grow_token (tp);
253 tp->chars[tp->charcount++] = c;
257 /* Tests if a token represents an integer.
258 Taken from guile-1.6.4/libguile/numbers.c:scm_istr2int(). */
259 static inline bool
260 is_integer_syntax (const char *str, int len, int radix)
262 const char *p = str;
263 const char *p_end = str + len;
265 /* The accepted syntax is
266 ['+'|'-'] DIGIT+
267 where DIGIT is a hexadecimal digit whose value is below radix. */
269 if (p == p_end)
270 return false;
271 if (*p == '+' || *p == '-')
273 p++;
274 if (p == p_end)
275 return false;
279 int c = *p++;
281 if (c >= '0' && c <= '9')
282 c = c - '0';
283 else if (c >= 'A' && c <= 'F')
284 c = c - 'A' + 10;
285 else if (c >= 'a' && c <= 'f')
286 c = c - 'a' + 10;
287 else
288 return false;
289 if (c >= radix)
290 return false;
292 while (p < p_end);
293 return true;
296 /* Tests if a token represents a rational, floating-point or complex number.
297 If unconstrained is false, only real numbers are accepted; otherwise,
298 complex numbers are accepted as well.
299 Taken from guile-1.6.4/libguile/numbers.c:scm_istr2flo(). */
300 static inline bool
301 is_other_number_syntax (const char *str, int len, int radix, bool unconstrained)
303 const char *p = str;
304 const char *p_end = str + len;
305 bool seen_sign;
306 bool seen_digits;
308 /* The accepted syntaxes are:
309 for a floating-point number:
310 ['+'|'-'] DIGIT+ [EXPONENT]
311 ['+'|'-'] DIGIT* '.' DIGIT+ [EXPONENT]
312 where EXPONENT ::= ['d'|'e'|'f'|'l'|'s'] DIGIT+
313 (Dot and exponent are allowed only if radix is 10.)
314 for a rational number:
315 ['+'|'-'] DIGIT+ '/' DIGIT+
316 for a complex number:
317 REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
318 REAL-NUMBER {'+'|'-'} 'i'
319 {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
320 {'+'|'-'} 'i'
321 REAL-NUMBER '@' REAL-NUMBER
323 if (p == p_end)
324 return false;
325 /* Parse leading sign. */
326 seen_sign = false;
327 if (*p == '+' || *p == '-')
329 p++;
330 if (p == p_end)
331 return false;
332 seen_sign = true;
333 /* Recognize complex number syntax: {'+'|'-'} 'i' */
334 if (unconstrained && (*p == 'I' || *p == 'i') && p + 1 == p_end)
335 return true;
337 /* Parse digits before dot or exponent or slash. */
338 seen_digits = false;
341 int c = *p;
343 if (c >= '0' && c <= '9')
344 c = c - '0';
345 else if (c >= 'A' && c <= 'F')
347 if (c >= 'D' && radix == 10) /* exponent? */
348 break;
349 c = c - 'A' + 10;
351 else if (c >= 'a' && c <= 'f')
353 if (c >= 'd' && radix == 10) /* exponent? */
354 break;
355 c = c - 'a' + 10;
357 else
358 break;
359 if (c >= radix)
360 return false;
361 seen_digits = true;
362 p++;
364 while (p < p_end);
365 /* If p == p_end, we know that seen_digits = true, and the number is an
366 integer without exponent. */
367 if (p < p_end)
369 /* If we have no digits so far, we need a decimal point later. */
370 if (!seen_digits && !(*p == '.' && radix == 10))
371 return false;
372 /* Trailing '#' signs are equivalent to zeroes. */
373 while (p < p_end && *p == '#')
374 p++;
375 if (p < p_end)
377 if (*p == '/')
379 /* Parse digits after the slash. */
380 bool all_zeroes = true;
381 p++;
382 for (; p < p_end; p++)
384 int c = *p;
386 if (c >= '0' && c <= '9')
387 c = c - '0';
388 else if (c >= 'A' && c <= 'F')
389 c = c - 'A' + 10;
390 else if (c >= 'a' && c <= 'f')
391 c = c - 'a' + 10;
392 else
393 break;
394 if (c >= radix)
395 return false;
396 if (c != 0)
397 all_zeroes = false;
399 /* A zero denominator is not allowed. */
400 if (all_zeroes)
401 return false;
402 /* Trailing '#' signs are equivalent to zeroes. */
403 while (p < p_end && *p == '#')
404 p++;
406 else
408 if (*p == '.')
410 /* Decimal point notation. */
411 if (radix != 10)
412 return false;
413 /* Parse digits after the decimal point. */
414 p++;
415 for (; p < p_end; p++)
417 int c = *p;
419 if (c >= '0' && c <= '9')
420 seen_digits = true;
421 else
422 break;
424 /* Digits are required before or after the decimal point. */
425 if (!seen_digits)
426 return false;
427 /* Trailing '#' signs are equivalent to zeroes. */
428 while (p < p_end && *p == '#')
429 p++;
431 if (p < p_end)
433 /* Parse exponent. */
434 switch (*p)
436 case 'D': case 'd':
437 case 'E': case 'e':
438 case 'F': case 'f':
439 case 'L': case 'l':
440 case 'S': case 's':
441 if (radix != 10)
442 return false;
443 p++;
444 if (p == p_end)
445 return false;
446 if (*p == '+' || *p == '-')
448 p++;
449 if (p == p_end)
450 return false;
452 if (!(*p >= '0' && *p <= '9'))
453 return false;
454 for (;;)
456 p++;
457 if (p == p_end)
458 break;
459 if (!(*p >= '0' && *p <= '9'))
460 break;
462 break;
463 default:
464 break;
470 if (p == p_end)
471 return true;
472 /* Recognize complex number syntax. */
473 if (unconstrained)
475 /* Recognize the syntax {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' */
476 if (seen_sign && (*p == 'I' || *p == 'i') && p + 1 == p_end)
477 return true;
478 /* Recognize the syntaxes
479 REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
480 REAL-NUMBER {'+'|'-'} 'i'
482 if (*p == '+' || *p == '-')
483 return (p_end[-1] == 'I' || p_end[-1] == 'i')
484 && (p + 1 == p_end - 1
485 || is_other_number_syntax (p, p_end - 1 - p, radix, false));
486 /* Recognize the syntax REAL-NUMBER '@' REAL-NUMBER */
487 if (*p == '@')
489 p++;
490 return is_other_number_syntax (p, p_end - p, radix, false);
493 return false;
496 /* Tests if a token represents a number.
497 Taken from guile-1.6.4/libguile/numbers.c:scm_istring2number(). */
498 static bool
499 is_number (const struct token *tp)
501 const char *str = tp->chars;
502 int len = tp->charcount;
503 int radix = 10;
504 enum { unknown, exact, inexact } exactness = unknown;
505 bool seen_radix_prefix = false;
506 bool seen_exactness_prefix = false;
508 if (len == 1)
509 if (*str == '+' || *str == '-')
510 return false;
511 while (len >= 2 && *str == '#')
513 switch (str[1])
515 case 'B': case 'b':
516 if (seen_radix_prefix)
517 return false;
518 radix = 2;
519 seen_radix_prefix = true;
520 break;
521 case 'O': case 'o':
522 if (seen_radix_prefix)
523 return false;
524 radix = 8;
525 seen_radix_prefix = true;
526 break;
527 case 'D': case 'd':
528 if (seen_radix_prefix)
529 return false;
530 radix = 10;
531 seen_radix_prefix = true;
532 break;
533 case 'X': case 'x':
534 if (seen_radix_prefix)
535 return false;
536 radix = 16;
537 seen_radix_prefix = true;
538 break;
539 case 'E': case 'e':
540 if (seen_exactness_prefix)
541 return false;
542 exactness = exact;
543 seen_exactness_prefix = true;
544 break;
545 case 'I': case 'i':
546 if (seen_exactness_prefix)
547 return false;
548 exactness = inexact;
549 seen_exactness_prefix = true;
550 break;
551 default:
552 return false;
554 str += 2;
555 len -= 2;
557 if (exactness != inexact)
559 /* Try to parse an integer. */
560 if (is_integer_syntax (str, len, 10))
561 return true;
562 /* FIXME: Other Scheme implementations support exact rational numbers
563 or exact complex numbers. */
565 if (exactness != exact)
567 /* Try to parse a rational, floating-point or complex number. */
568 if (is_other_number_syntax (str, len, 10, true))
569 return true;
571 return false;
575 /* ========================= Accumulating comments ========================= */
578 static char *buffer;
579 static size_t bufmax;
580 static size_t buflen;
582 static inline void
583 comment_start ()
585 buflen = 0;
588 static inline void
589 comment_add (int c)
591 if (buflen >= bufmax)
593 bufmax = 2 * bufmax + 10;
594 buffer = xrealloc (buffer, bufmax);
596 buffer[buflen++] = c;
599 static inline void
600 comment_line_end (size_t chars_to_remove)
602 buflen -= chars_to_remove;
603 while (buflen >= 1
604 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
605 --buflen;
606 if (chars_to_remove == 0 && buflen >= bufmax)
608 bufmax = 2 * bufmax + 10;
609 buffer = xrealloc (buffer, bufmax);
611 buffer[buflen] = '\0';
612 xgettext_comment_add (buffer);
616 /* These are for tracking whether comments count as immediately before
617 keyword. */
618 static int last_comment_line;
619 static int last_non_comment_line;
622 /* ========================= Accumulating messages ========================= */
625 static message_list_ty *mlp;
628 /* ========================== Reading of objects. ========================= */
631 /* We are only interested in symbols (e.g. gettext or ngettext) and strings.
632 Other objects need not to be represented precisely. */
633 enum object_type
635 t_symbol, /* symbol */
636 t_string, /* string */
637 t_other, /* other kind of real object */
638 t_dot, /* '.' pseudo object */
639 t_close, /* ')' pseudo object */
640 t_eof /* EOF marker */
643 struct object
645 enum object_type type;
646 struct token *token; /* for t_symbol and t_string */
647 int line_number_at_start; /* for t_string */
650 /* Free the memory pointed to by a 'struct object'. */
651 static inline void
652 free_object (struct object *op)
654 if (op->type == t_symbol || op->type == t_string)
656 free_token (op->token);
657 free (op->token);
661 /* Convert a t_symbol/t_string token to a char*. */
662 static char *
663 string_of_object (const struct object *op)
665 char *str;
666 int n;
668 if (!(op->type == t_symbol || op->type == t_string))
669 abort ();
670 n = op->token->charcount;
671 str = (char *) xmalloc (n + 1);
672 memcpy (str, op->token->chars, n);
673 str[n] = '\0';
674 return str;
677 /* Context lookup table. */
678 static flag_context_list_table_ty *flag_context_list_table;
680 /* Read the next object. */
681 static void
682 read_object (struct object *op, flag_context_ty outer_context)
684 for (;;)
686 int c = do_getc ();
688 switch (c)
690 case EOF:
691 op->type = t_eof;
692 return;
694 case ' ': case '\r': case '\f': case '\t':
695 continue;
697 case '\n':
698 /* Comments assumed to be grouped with a message must immediately
699 precede it, with no non-whitespace token on a line between
700 both. */
701 if (last_non_comment_line > last_comment_line)
702 xgettext_comment_reset ();
703 continue;
705 case ';':
707 bool all_semicolons = true;
709 last_comment_line = line_number;
710 comment_start ();
711 for (;;)
713 c = do_getc ();
714 if (c == EOF || c == '\n')
715 break;
716 if (c != ';')
717 all_semicolons = false;
718 if (!all_semicolons)
720 /* We skip all leading white space, but not EOLs. */
721 if (!(buflen == 0 && (c == ' ' || c == '\t')))
722 comment_add (c);
725 comment_line_end (0);
726 continue;
729 case '(':
731 int arg = 0; /* Current argument number. */
732 flag_context_list_iterator_ty context_iter;
733 int argnum1 = 0; /* First string position. */
734 int argnum2 = 0; /* Plural string position. */
735 message_ty *plural_mp = NULL; /* Remember the msgid. */
737 for (;; arg++)
739 struct object inner;
740 flag_context_ty inner_context;
742 if (arg == 0)
743 inner_context = null_context;
744 else
745 inner_context =
746 inherited_context (outer_context,
747 flag_context_list_iterator_advance (
748 &context_iter));
750 read_object (&inner, inner_context);
752 /* Recognize end of list. */
753 if (inner.type == t_close)
755 op->type = t_other;
756 last_non_comment_line = line_number;
757 return;
760 /* Dots are not allowed in every position.
761 But be tolerant. */
763 /* EOF inside list is illegal.
764 But be tolerant. */
765 if (inner.type == t_eof)
766 break;
768 if (arg == 0)
770 /* This is the function position. */
771 if (inner.type == t_symbol)
773 char *symbol_name = string_of_object (&inner);
774 void *keyword_value;
776 if (find_entry (&keywords,
777 symbol_name, strlen (symbol_name),
778 &keyword_value)
779 == 0)
781 argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
782 argnum2 = (int) (long) keyword_value >> 10;
785 context_iter =
786 flag_context_list_iterator (
787 flag_context_list_table_lookup (
788 flag_context_list_table,
789 symbol_name, strlen (symbol_name)));
791 free (symbol_name);
793 else
794 context_iter = null_context_list_iterator;
796 else
798 /* These are the argument positions.
799 Extract a string if we have reached the right
800 argument position. */
801 if (arg == argnum1)
803 if (inner.type == t_string)
805 lex_pos_ty pos;
806 message_ty *mp;
808 pos.file_name = logical_file_name;
809 pos.line_number = inner.line_number_at_start;
810 mp = remember_a_message (mlp, string_of_object (&inner),
811 inner_context, &pos);
812 if (argnum2 > 0)
813 plural_mp = mp;
816 else if (arg == argnum2)
818 if (inner.type == t_string && plural_mp != NULL)
820 lex_pos_ty pos;
822 pos.file_name = logical_file_name;
823 pos.line_number = inner.line_number_at_start;
824 remember_a_message_plural (plural_mp, string_of_object (&inner),
825 inner_context, &pos);
830 free_object (&inner);
833 op->type = t_other;
834 last_non_comment_line = line_number;
835 return;
837 case ')':
838 /* Tell the caller about the end of list.
839 Unmatched closing parenthesis is illegal.
840 But be tolerant. */
841 op->type = t_close;
842 last_non_comment_line = line_number;
843 return;
845 case ',':
847 int c = do_getc ();
848 /* The ,@ handling inside lists is wrong anyway, because
849 ,@form expands to an unknown number of elements. */
850 if (c != EOF && c != '@')
851 do_ungetc (c);
853 /*FALLTHROUGH*/
854 case '\'':
855 case '`':
857 struct object inner;
859 read_object (&inner, null_context);
861 /* Dots and EOF are not allowed here. But be tolerant. */
863 free_object (&inner);
865 op->type = t_other;
866 last_non_comment_line = line_number;
867 return;
870 case '#':
871 /* Dispatch macro handling. */
873 c = do_getc ();
874 if (c == EOF)
875 /* Invalid input. Be tolerant, no error message. */
877 op->type = t_other;
878 return;
881 switch (c)
883 case '(': /* Vector */
884 do_ungetc (c);
886 struct object inner;
887 read_object (&inner, null_context);
888 /* Dots and EOF are not allowed here.
889 But be tolerant. */
890 free_object (&inner);
891 op->type = t_other;
892 last_non_comment_line = line_number;
893 return;
896 case 'T': case 't': /* Boolean true */
897 case 'F': case 'f': /* Boolean false */
898 op->type = t_other;
899 last_non_comment_line = line_number;
900 return;
902 case 'B': case 'b':
903 case 'O': case 'o':
904 case 'D': case 'd':
905 case 'X': case 'x':
906 case 'E': case 'e':
907 case 'I': case 'i':
909 struct token token;
910 do_ungetc (c);
911 read_token (&token, '#');
912 if (is_number (&token))
914 /* A number. */
915 free_token (&token);
916 op->type = t_other;
917 last_non_comment_line = line_number;
918 return;
920 else
922 if (token.charcount == 2
923 && (token.chars[1] == 'e' || token.chars[1] == 'i'))
925 c = do_getc ();
926 if (c != EOF)
927 do_ungetc (c);
928 if (c == '(')
929 /* Homogenous vector syntax, see arrays.scm. */
930 case 'a': /* Vectors of char */
931 case 'c': /* Vectors of complex */
932 /*case 'e':*/ /* Vectors of long */
933 case 'h': /* Vectors of short */
934 /*case 'i':*/ /* Vectors of double-float */
935 case 'l': /* Vectors of long long */
936 case 's': /* Vectors of single-float */
937 case 'u': /* Vectors of unsigned long */
938 case 'y': /* Vectors of byte */
940 struct object inner;
941 read_object (&inner, null_context);
942 /* Dots and EOF are not allowed here.
943 But be tolerant. */
944 free_token (&token);
945 free_object (&inner);
946 op->type = t_other;
947 last_non_comment_line = line_number;
948 return;
951 /* Unknown # object. But be tolerant. */
952 free_token (&token);
953 op->type = t_other;
954 last_non_comment_line = line_number;
955 return;
959 case '!':
960 /* Block comment '#! ... \n!#\n'. We don't extract it
961 because it's only used to introduce scripts on Unix. */
963 int last1 = 0;
964 int last2 = 0;
965 int last3 = 0;
967 for (;;)
969 c = do_getc ();
970 if (c == EOF)
971 /* EOF is not allowed here. But be tolerant. */
972 break;
973 if (last3 == '\n' && last2 == '!' && last1 == '#'
974 && c == '\n')
975 break;
976 last3 = last2;
977 last2 = last1;
978 last1 = c;
980 continue;
983 case '*':
984 /* Bit vector. */
986 struct token token;
987 read_token (&token, c);
988 /* The token should consists only of '0' and '1', except
989 for the initial '*'. But be tolerant. */
990 free_token (&token);
991 op->type = t_other;
992 last_non_comment_line = line_number;
993 return;
996 case '{':
997 /* Symbol with multiple escapes: #{...}# */
999 op->token = (struct token *) xmalloc (sizeof (struct token));
1001 init_token (op->token);
1003 for (;;)
1005 c = do_getc ();
1007 if (c == EOF)
1008 break;
1009 if (c == '\\')
1011 c = do_getc ();
1012 if (c == EOF)
1013 break;
1015 else if (c == '}')
1017 c = do_getc ();
1018 if (c == '#')
1019 break;
1020 if (c != EOF)
1021 do_ungetc (c);
1022 c = '}';
1024 grow_token (op->token);
1025 op->token->chars[op->token->charcount++] = c;
1028 op->type = t_symbol;
1029 last_non_comment_line = line_number;
1030 return;
1033 case '\\':
1034 /* Character. */
1036 struct token token;
1037 c = do_getc ();
1038 if (c != EOF)
1040 read_token (&token, c);
1041 free_token (&token);
1043 op->type = t_other;
1044 last_non_comment_line = line_number;
1045 return;
1048 case ':': /* Keyword. */
1049 case '&': /* Deprecated keyword, installed in optargs.scm. */
1051 struct token token;
1052 read_token (&token, '-');
1053 free_token (&token);
1054 op->type = t_other;
1055 last_non_comment_line = line_number;
1056 return;
1059 /* The following are installed through read-hash-extend. */
1061 /* arrays.scm */
1062 case '0': case '1': case '2': case '3': case '4':
1063 case '5': case '6': case '7': case '8': case '9':
1064 /* Multidimensional array syntax: #nx(...) where
1065 n ::= DIGIT+
1066 x ::= {'a'|'b'|'c'|'e'|'i'|'s'|'u'}
1069 c = do_getc ();
1070 while (c >= '0' && c <= '9');
1071 /* c should be one of {'a'|'b'|'c'|'e'|'i'|'s'|'u'}.
1072 But be tolerant. */
1073 /*FALLTHROUGH*/
1074 case '\'': /* boot-9.scm */
1075 case '.': /* boot-9.scm */
1076 case ',': /* srfi-10.scm */
1078 struct object inner;
1079 read_object (&inner, null_context);
1080 /* Dots and EOF are not allowed here.
1081 But be tolerant. */
1082 free_object (&inner);
1083 op->type = t_other;
1084 last_non_comment_line = line_number;
1085 return;
1088 default:
1089 /* Unknown. */
1090 op->type = t_other;
1091 last_non_comment_line = line_number;
1092 return;
1094 /*NOTREACHED*/
1095 abort ();
1098 case '"':
1100 op->token = (struct token *) xmalloc (sizeof (struct token));
1101 init_token (op->token);
1102 op->line_number_at_start = line_number;
1103 for (;;)
1105 int c = do_getc ();
1106 if (c == EOF)
1107 /* Invalid input. Be tolerant, no error message. */
1108 break;
1109 if (c == '"')
1110 break;
1111 if (c == '\\')
1113 c = do_getc ();
1114 if (c == EOF)
1115 /* Invalid input. Be tolerant, no error message. */
1116 break;
1117 switch (c)
1119 case '\n':
1120 continue;
1121 case '0':
1122 c = '\0';
1123 break;
1124 case 'a':
1125 c = '\a';
1126 break;
1127 case 'f':
1128 c = '\f';
1129 break;
1130 case 'n':
1131 c = '\n';
1132 break;
1133 case 'r':
1134 c = '\r';
1135 break;
1136 case 't':
1137 c = '\t';
1138 break;
1139 case 'v':
1140 c = '\v';
1141 break;
1142 default:
1143 break;
1146 grow_token (op->token);
1147 op->token->chars[op->token->charcount++] = c;
1149 op->type = t_string;
1151 if (extract_all)
1153 lex_pos_ty pos;
1155 pos.file_name = logical_file_name;
1156 pos.line_number = op->line_number_at_start;
1157 remember_a_message (mlp, string_of_object (op),
1158 null_context, &pos);
1160 last_non_comment_line = line_number;
1161 return;
1164 case '0': case '1': case '2': case '3': case '4':
1165 case '5': case '6': case '7': case '8': case '9':
1166 case '+': case '-': case '.':
1167 /* Read a number or symbol token. */
1168 op->token = (struct token *) xmalloc (sizeof (struct token));
1169 read_token (op->token, c);
1170 if (op->token->charcount == 1 && op->token->chars[0] == '.')
1172 free_token (op->token);
1173 free (op->token);
1174 op->type = t_dot;
1176 else if (is_number (op->token))
1178 /* A number. */
1179 free_token (op->token);
1180 free (op->token);
1181 op->type = t_other;
1183 else
1185 /* A symbol. */
1186 op->type = t_symbol;
1188 last_non_comment_line = line_number;
1189 return;
1191 case ':':
1192 default:
1193 /* Read a symbol token. */
1194 op->token = (struct token *) xmalloc (sizeof (struct token));
1195 read_token (op->token, c);
1196 op->type = t_symbol;
1197 last_non_comment_line = line_number;
1198 return;
1204 void
1205 extract_scheme (FILE *f,
1206 const char *real_filename, const char *logical_filename,
1207 flag_context_list_table_ty *flag_table,
1208 msgdomain_list_ty *mdlp)
1210 mlp = mdlp->item[0]->messages;
1212 fp = f;
1213 real_file_name = real_filename;
1214 logical_file_name = xstrdup (logical_filename);
1215 line_number = 1;
1217 last_comment_line = -1;
1218 last_non_comment_line = -1;
1220 flag_context_list_table = flag_table;
1222 init_keywords ();
1224 /* Eat tokens until eof is seen. When read_object returns
1225 due to an unbalanced closing parenthesis, just restart it. */
1228 struct object toplevel_object;
1230 read_object (&toplevel_object, null_context);
1232 if (toplevel_object.type == t_eof)
1233 break;
1235 free_object (&toplevel_object);
1237 while (!feof (fp));
1239 /* Close scanner. */
1240 fp = NULL;
1241 real_file_name = NULL;
1242 logical_file_name = NULL;
1243 line_number = 0;