Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / x-elisp.c
blob03251636a26d68706f27fefdf71997448552a562
1 /* xgettext Emacs Lisp backend.
2 Copyright (C) 2001-2003 Free Software Foundation, Inc.
4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001-2002.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
24 #include <errno.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
30 #include "message.h"
31 #include "xgettext.h"
32 #include "x-elisp.h"
33 #include "error.h"
34 #include "xalloc.h"
35 #include "exit.h"
36 #include "hash.h"
37 #include "c-ctype.h"
38 #include "gettext.h"
40 #define _(s) gettext(s)
43 /* Summary of Emacs Lisp syntax:
44 - ';' starts a comment until end of line.
45 - '#@nn' starts a comment of nn bytes.
46 - Integers are constituted of an optional prefix (#b, #B for binary,
47 #o, #O for octal, #x, #X for hexadecimal, #nnr, #nnR for any radix),
48 an optional sign (+ or -), the digits, and an optional trailing dot.
49 - Characters are written as '?' followed by the character, possibly
50 with an escape sequence, for examples '?a', '?\n', '?\177'.
51 - Strings are delimited by double quotes. Backslash introduces an escape
52 sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
53 '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
54 - Symbols: can contain meta-characters if preceded by backslash.
55 - Uninterned symbols: written as #:SYMBOL.
56 - () delimit lists.
57 - [] delimit vectors.
58 The reader is implemented in emacs-21.1/src/lread.c. */
61 /* ====================== Keyword set customization. ====================== */
63 /* If true extract all strings. */
64 static bool extract_all = false;
66 static hash_table keywords;
67 static bool default_keywords = true;
70 void
71 x_elisp_extract_all ()
73 extract_all = true;
77 void
78 x_elisp_keyword (const char *name)
80 if (name == NULL)
81 default_keywords = false;
82 else
84 const char *end;
85 int argnum1;
86 int argnum2;
87 const char *colon;
89 if (keywords.table == NULL)
90 init_hash (&keywords, 100);
92 split_keywordspec (name, &end, &argnum1, &argnum2);
94 /* The characters between name and end should form a valid Lisp
95 symbol. */
96 colon = strchr (name, ':');
97 if (colon == NULL || colon >= end)
99 if (argnum1 == 0)
100 argnum1 = 1;
101 insert_entry (&keywords, name, end - name,
102 (void *) (long) (argnum1 + (argnum2 << 10)));
107 /* Finish initializing the keywords hash table.
108 Called after argument processing, before each file is processed. */
109 static void
110 init_keywords ()
112 if (default_keywords)
114 x_elisp_keyword ("_");
115 default_keywords = false;
119 void
120 init_flag_table_elisp ()
122 xgettext_record_flag ("_:1:pass-elisp-format");
123 xgettext_record_flag ("format:1:elisp-format");
127 /* ======================== Reading of characters. ======================== */
129 /* Real filename, used in error messages about the input file. */
130 static const char *real_file_name;
132 /* Logical filename and line number, used to label the extracted messages. */
133 static char *logical_file_name;
134 static int line_number;
136 /* The input file stream. */
137 static FILE *fp;
140 /* Fetch the next character from the input file. */
141 static int
142 do_getc ()
144 int c = getc (fp);
146 if (c == EOF)
148 if (ferror (fp))
149 error (EXIT_FAILURE, errno, _("\
150 error while reading \"%s\""), real_file_name);
152 else if (c == '\n')
153 line_number++;
155 return c;
158 /* Put back the last fetched character, not EOF. */
159 static void
160 do_ungetc (int c)
162 if (c == '\n')
163 line_number--;
164 ungetc (c, fp);
168 /* ========================== Reading of tokens. ========================== */
171 /* A token consists of a sequence of characters. */
172 struct token
174 int allocated; /* number of allocated 'token_char's */
175 int charcount; /* number of used 'token_char's */
176 char *chars; /* the token's constituents */
179 /* Initialize a 'struct token'. */
180 static inline void
181 init_token (struct token *tp)
183 tp->allocated = 10;
184 tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
185 tp->charcount = 0;
188 /* Free the memory pointed to by a 'struct token'. */
189 static inline void
190 free_token (struct token *tp)
192 free (tp->chars);
195 /* Ensure there is enough room in the token for one more character. */
196 static inline void
197 grow_token (struct token *tp)
199 if (tp->charcount == tp->allocated)
201 tp->allocated *= 2;
202 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
206 /* Test whether a token has integer syntax. */
207 static inline bool
208 is_integer (const char *p)
210 /* NB: Yes, '+.' and '-.' both designate the integer 0. */
211 const char *p_start = p;
213 if (*p == '+' || *p == '-')
214 p++;
215 if (*p == '\0')
216 return false;
217 while (*p >= '0' && *p <= '9')
218 p++;
219 if (p > p_start && *p == '.')
220 p++;
221 return (*p == '\0');
224 /* Test whether a token has float syntax. */
225 static inline bool
226 is_float (const char *p)
228 enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 };
229 int state;
231 state = 0;
232 if (*p == '+' || *p == '-')
233 p++;
234 if (*p >= '0' && *p <= '9')
236 state |= LEAD_INT;
238 p++;
239 while (*p >= '0' && *p <= '9');
241 if (*p == '.')
243 state |= DOT_CHAR;
244 p++;
246 if (*p >= '0' && *p <= '9')
248 state |= TRAIL_INT;
250 p++;
251 while (*p >= '0' && *p <= '9');
253 if (*p == 'e' || *p == 'E')
255 state |= E_CHAR;
256 p++;
257 if (*p == '+' || *p == '-')
258 p++;
259 if (*p >= '0' && *p <= '9')
261 state |= EXP_INT;
263 p++;
264 while (*p >= '0' && *p <= '9');
266 else if (p[-1] == '+'
267 && ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F')
268 || (p[0] == 'N' && p[1] == 'a' && p[2] == 'N')))
270 state |= EXP_INT;
271 p += 3;
274 return (*p == '\0')
275 && (state == (LEAD_INT | DOT_CHAR | TRAIL_INT)
276 || state == (DOT_CHAR | TRAIL_INT)
277 || state == (LEAD_INT | E_CHAR | EXP_INT)
278 || state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)
279 || state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT));
282 /* Read the next token. 'first' is the first character, which has already
283 been read. Returns true for a symbol, false for a number. */
284 static bool
285 read_token (struct token *tp, int first)
287 int c;
288 bool quoted = false;
290 init_token (tp);
292 c = first;
294 for (;; c = do_getc ())
296 if (c == EOF)
297 break;
298 if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
299 break;
300 if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')'
301 || c == '[' || c == ']' || c == '#')
302 break;
303 if (c == '\\')
305 quoted = true;
306 c = do_getc ();
307 if (c == EOF)
308 /* Invalid, but be tolerant. */
309 break;
311 grow_token (tp);
312 tp->chars[tp->charcount++] = c;
314 if (c != EOF)
315 do_ungetc (c);
317 if (quoted)
318 return true; /* symbol */
320 /* Add a NUL byte at the end, for is_integer and is_float. */
321 grow_token (tp);
322 tp->chars[tp->charcount] = '\0';
324 if (is_integer (tp->chars) || is_float (tp->chars))
325 return false; /* number */
326 else
327 return true; /* symbol */
331 /* ========================= Accumulating comments ========================= */
334 static char *buffer;
335 static size_t bufmax;
336 static size_t buflen;
338 static inline void
339 comment_start ()
341 buflen = 0;
344 static inline void
345 comment_add (int c)
347 if (buflen >= bufmax)
349 bufmax = 2 * bufmax + 10;
350 buffer = xrealloc (buffer, bufmax);
352 buffer[buflen++] = c;
355 static inline void
356 comment_line_end (size_t chars_to_remove)
358 buflen -= chars_to_remove;
359 while (buflen >= 1
360 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
361 --buflen;
362 if (chars_to_remove == 0 && buflen >= bufmax)
364 bufmax = 2 * bufmax + 10;
365 buffer = xrealloc (buffer, bufmax);
367 buffer[buflen] = '\0';
368 xgettext_comment_add (buffer);
372 /* These are for tracking whether comments count as immediately before
373 keyword. */
374 static int last_comment_line;
375 static int last_non_comment_line;
378 /* ========================= Accumulating messages ========================= */
381 static message_list_ty *mlp;
384 /* ============== Reading of objects. See CLHS 2 "Syntax". ============== */
387 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
388 Other objects need not to be represented precisely. */
389 enum object_type
391 t_symbol, /* symbol */
392 t_string, /* string */
393 t_other, /* other kind of real object */
394 t_dot, /* '.' pseudo object */
395 t_listclose, /* ')' pseudo object */
396 t_vectorclose,/* ']' pseudo object */
397 t_eof /* EOF marker */
400 struct object
402 enum object_type type;
403 struct token *token; /* for t_symbol and t_string */
404 int line_number_at_start; /* for t_string */
407 /* Free the memory pointed to by a 'struct object'. */
408 static inline void
409 free_object (struct object *op)
411 if (op->type == t_symbol || op->type == t_string)
413 free_token (op->token);
414 free (op->token);
418 /* Convert a t_symbol/t_string token to a char*. */
419 static char *
420 string_of_object (const struct object *op)
422 char *str;
423 int n;
425 if (!(op->type == t_symbol || op->type == t_string))
426 abort ();
427 n = op->token->charcount;
428 str = (char *) xmalloc (n + 1);
429 memcpy (str, op->token->chars, n);
430 str[n] = '\0';
431 return str;
434 /* Context lookup table. */
435 static flag_context_list_table_ty *flag_context_list_table;
437 /* Returns the character represented by an escape sequence. */
438 #define IGNORABLE_ESCAPE (EOF - 1)
439 static int
440 do_getc_escaped (int c, bool in_string)
442 switch (c)
444 case 'a':
445 return '\a';
446 case 'b':
447 return '\b';
448 case 'd':
449 return 0x7F;
450 case 'e':
451 return 0x1B;
452 case 'f':
453 return '\f';
454 case 'n':
455 return '\n';
456 case 'r':
457 return '\r';
458 case 't':
459 return '\t';
460 case 'v':
461 return '\v';
463 case '\n':
464 return IGNORABLE_ESCAPE;
466 case ' ':
467 return (in_string ? IGNORABLE_ESCAPE : ' ');
469 case 'M': /* meta */
470 c = do_getc ();
471 if (c == EOF)
472 return EOF;
473 if (c != '-')
474 /* Invalid input. But be tolerant. */
475 return c;
476 c = do_getc ();
477 if (c == EOF)
478 return EOF;
479 if (c == '\\')
481 c = do_getc ();
482 if (c == EOF)
483 return EOF;
484 c = do_getc_escaped (c, false);
486 return c | 0x80;
488 case 'S': /* shift */
489 c = do_getc ();
490 if (c == EOF)
491 return EOF;
492 if (c != '-')
493 /* Invalid input. But be tolerant. */
494 return c;
495 c = do_getc ();
496 if (c == EOF)
497 return EOF;
498 if (c == '\\')
500 c = do_getc ();
501 if (c == EOF)
502 return EOF;
503 c = do_getc_escaped (c, false);
505 return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
507 case 'H': /* hyper */
508 case 'A': /* alt */
509 case 's': /* super */
510 c = do_getc ();
511 if (c == EOF)
512 return EOF;
513 if (c != '-')
514 /* Invalid input. But be tolerant. */
515 return c;
516 c = do_getc ();
517 if (c == EOF)
518 return EOF;
519 if (c == '\\')
521 c = do_getc ();
522 if (c == EOF)
523 return EOF;
524 c = do_getc_escaped (c, false);
526 return c;
528 case 'C': /* ctrl */
529 c = do_getc ();
530 if (c == EOF)
531 return EOF;
532 if (c != '-')
533 /* Invalid input. But be tolerant. */
534 return c;
535 /*FALLTHROUGH*/
536 case '^':
537 c = do_getc ();
538 if (c == EOF)
539 return EOF;
540 if (c == '\\')
542 c = do_getc ();
543 if (c == EOF)
544 return EOF;
545 c = do_getc_escaped (c, false);
547 if (c == '?')
548 return 0x7F;
549 if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A)
550 return c & 0x9F;
551 if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F)
552 return c & 0x9F;
553 #if 0 /* We cannot handle NUL bytes in strings. */
554 if (c == ' ')
555 return 0x00;
556 #endif
557 return c;
559 case '0': case '1': case '2': case '3': case '4':
560 case '5': case '6': case '7':
561 /* An octal escape, as in ANSI C. */
563 int n = c - '0';
565 c = do_getc ();
566 if (c != EOF)
568 if (c >= '0' && c <= '7')
570 n = (n << 3) + (c - '0');
571 c = do_getc ();
572 if (c != EOF)
574 if (c >= '0' && c <= '7')
575 n = (n << 3) + (c - '0');
576 else
577 do_ungetc (c);
580 else
581 do_ungetc (c);
583 return (unsigned char) n;
586 case 'x':
587 /* A hexadecimal escape, as in ANSI C. */
589 int n = 0;
591 for (;;)
593 c = do_getc ();
594 if (c == EOF)
595 break;
596 else if (c >= '0' && c <= '9')
597 n = (n << 4) + (c - '0');
598 else if (c >= 'A' && c <= 'F')
599 n = (n << 4) + (c - 'A' + 10);
600 else if (c >= 'a' && c <= 'f')
601 n = (n << 4) + (c - 'a' + 10);
602 else
604 do_ungetc (c);
605 break;
608 return (unsigned char) n;
611 default:
612 /* Ignore Emacs multibyte character stuff. All the strings we are
613 interested in are ASCII strings. */
614 return c;
618 /* Read the next object.
619 'first_in_list' and 'new_backquote_flag' are used for reading old
620 backquote syntax and new backquote syntax. */
621 static void
622 read_object (struct object *op, bool first_in_list, bool new_backquote_flag,
623 flag_context_ty outer_context)
625 for (;;)
627 int c;
629 c = do_getc ();
631 switch (c)
633 case EOF:
634 op->type = t_eof;
635 return;
637 case '\n':
638 /* Comments assumed to be grouped with a message must immediately
639 precede it, with no non-whitespace token on a line between
640 both. */
641 if (last_non_comment_line > last_comment_line)
642 xgettext_comment_reset ();
643 continue;
645 case '(':
647 int arg = 0; /* Current argument number. */
648 flag_context_list_iterator_ty context_iter;
649 int argnum1 = 0; /* First string position. */
650 int argnum2 = 0; /* Plural string position. */
651 message_ty *plural_mp = NULL; /* Remember the msgid. */
653 for (;; arg++)
655 struct object inner;
656 flag_context_ty inner_context;
658 if (arg == 0)
659 inner_context = null_context;
660 else
661 inner_context =
662 inherited_context (outer_context,
663 flag_context_list_iterator_advance (
664 &context_iter));
666 read_object (&inner, arg == 0, new_backquote_flag,
667 inner_context);
669 /* Recognize end of list. */
670 if (inner.type == t_listclose)
672 op->type = t_other;
673 /* Don't bother converting "()" to "NIL". */
674 last_non_comment_line = line_number;
675 return;
678 /* Dots are not allowed in every position. ']' is not allowed.
679 But be tolerant. */
681 /* EOF inside list is illegal. But be tolerant. */
682 if (inner.type == t_eof)
683 break;
685 if (arg == 0)
687 /* This is the function position. */
688 if (inner.type == t_symbol)
690 char *symbol_name = string_of_object (&inner);
691 void *keyword_value;
693 if (find_entry (&keywords,
694 symbol_name, strlen (symbol_name),
695 &keyword_value)
696 == 0)
698 argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
699 argnum2 = (int) (long) keyword_value >> 10;
702 context_iter =
703 flag_context_list_iterator (
704 flag_context_list_table_lookup (
705 flag_context_list_table,
706 symbol_name, strlen (symbol_name)));
708 free (symbol_name);
710 else
711 context_iter = null_context_list_iterator;
713 else
715 /* These are the argument positions.
716 Extract a string if we have reached the right
717 argument position. */
718 if (arg == argnum1)
720 if (inner.type == t_string)
722 lex_pos_ty pos;
723 message_ty *mp;
725 pos.file_name = logical_file_name;
726 pos.line_number = inner.line_number_at_start;
727 mp = remember_a_message (mlp, string_of_object (&inner),
728 inner_context, &pos);
729 if (argnum2 > 0)
730 plural_mp = mp;
733 else if (arg == argnum2)
735 if (inner.type == t_string && plural_mp != NULL)
737 lex_pos_ty pos;
739 pos.file_name = logical_file_name;
740 pos.line_number = inner.line_number_at_start;
741 remember_a_message_plural (plural_mp, string_of_object (&inner),
742 inner_context, &pos);
747 free_object (&inner);
750 op->type = t_other;
751 last_non_comment_line = line_number;
752 return;
754 case ')':
755 /* Tell the caller about the end of list.
756 Unmatched closing parenthesis is illegal. But be tolerant. */
757 op->type = t_listclose;
758 last_non_comment_line = line_number;
759 return;
761 case '[':
763 for (;;)
765 struct object inner;
767 read_object (&inner, false, new_backquote_flag, null_context);
769 /* Recognize end of vector. */
770 if (inner.type == t_vectorclose)
772 op->type = t_other;
773 last_non_comment_line = line_number;
774 return;
777 /* Dots and ')' are not allowed. But be tolerant. */
779 /* EOF inside vector is illegal. But be tolerant. */
780 if (inner.type == t_eof)
781 break;
783 free_object (&inner);
786 op->type = t_other;
787 last_non_comment_line = line_number;
788 return;
790 case ']':
791 /* Tell the caller about the end of vector.
792 Unmatched closing bracket is illegal. But be tolerant. */
793 op->type = t_vectorclose;
794 last_non_comment_line = line_number;
795 return;
797 case '\'':
799 struct object inner;
801 read_object (&inner, false, new_backquote_flag, null_context);
803 /* Dots and EOF are not allowed here. But be tolerant. */
805 free_object (&inner);
807 op->type = t_other;
808 last_non_comment_line = line_number;
809 return;
812 case '`':
813 if (first_in_list)
814 goto default_label;
816 struct object inner;
818 read_object (&inner, false, true, null_context);
820 /* Dots and EOF are not allowed here. But be tolerant. */
822 free_object (&inner);
824 op->type = t_other;
825 last_non_comment_line = line_number;
826 return;
829 case ',':
830 if (!new_backquote_flag)
831 goto default_label;
833 int c = do_getc ();
834 /* The ,@ handling inside lists is wrong anyway, because
835 ,@form expands to an unknown number of elements. */
836 if (c != EOF && c != '@' && c != '.')
837 do_ungetc (c);
840 struct object inner;
842 read_object (&inner, false, false, null_context);
844 /* Dots and EOF are not allowed here. But be tolerant. */
846 free_object (&inner);
848 op->type = t_other;
849 last_non_comment_line = line_number;
850 return;
853 case ';':
855 bool all_semicolons = true;
857 last_comment_line = line_number;
858 comment_start ();
859 for (;;)
861 int c = do_getc ();
862 if (c == EOF || c == '\n')
863 break;
864 if (c != ';')
865 all_semicolons = false;
866 if (!all_semicolons)
868 /* We skip all leading white space, but not EOLs. */
869 if (!(buflen == 0 && (c == ' ' || c == '\t')))
870 comment_add (c);
873 comment_line_end (0);
874 continue;
877 case '"':
879 op->token = (struct token *) xmalloc (sizeof (struct token));
880 init_token (op->token);
881 op->line_number_at_start = line_number;
882 for (;;)
884 int c = do_getc ();
885 if (c == EOF)
886 /* Invalid input. Be tolerant, no error message. */
887 break;
888 if (c == '"')
889 break;
890 if (c == '\\')
892 c = do_getc ();
893 if (c == EOF)
894 /* Invalid input. Be tolerant, no error message. */
895 break;
896 c = do_getc_escaped (c, true);
897 if (c == EOF)
898 /* Invalid input. Be tolerant, no error message. */
899 break;
900 if (c == IGNORABLE_ESCAPE)
901 /* Ignore escaped newline and escaped space. */
903 else
905 grow_token (op->token);
906 op->token->chars[op->token->charcount++] = c;
909 else
911 grow_token (op->token);
912 op->token->chars[op->token->charcount++] = c;
915 op->type = t_string;
917 if (extract_all)
919 lex_pos_ty pos;
921 pos.file_name = logical_file_name;
922 pos.line_number = op->line_number_at_start;
923 remember_a_message (mlp, string_of_object (op),
924 null_context, &pos);
926 last_non_comment_line = line_number;
927 return;
930 case '?':
931 c = do_getc ();
932 if (c == EOF)
933 /* Invalid input. Be tolerant, no error message. */
935 else if (c == '\\')
937 c = do_getc ();
938 if (c == EOF)
939 /* Invalid input. Be tolerant, no error message. */
941 else
943 c = do_getc_escaped (c, false);
944 if (c == EOF)
945 /* Invalid input. Be tolerant, no error message. */
949 /* Impossible to deal with Emacs multibyte character stuff here. */
950 op->type = t_other;
951 last_non_comment_line = line_number;
952 return;
954 case '#':
955 /* Dispatch macro handling. */
956 c = do_getc ();
957 if (c == EOF)
958 /* Invalid input. Be tolerant, no error message. */
960 op->type = t_other;
961 return;
964 switch (c)
966 case '^':
967 c = do_getc ();
968 if (c == '^')
969 c = do_getc ();
970 if (c == '[')
972 /* Read a char table, same syntax as a vector. */
973 for (;;)
975 struct object inner;
977 read_object (&inner, false, new_backquote_flag,
978 null_context);
980 /* Recognize end of vector. */
981 if (inner.type == t_vectorclose)
983 op->type = t_other;
984 last_non_comment_line = line_number;
985 return;
988 /* Dots and ')' are not allowed. But be tolerant. */
990 /* EOF inside vector is illegal. But be tolerant. */
991 if (inner.type == t_eof)
992 break;
994 free_object (&inner);
996 op->type = t_other;
997 last_non_comment_line = line_number;
998 return;
1000 else
1001 /* Invalid input. Be tolerant, no error message. */
1003 op->type = t_other;
1004 if (c != EOF)
1005 last_non_comment_line = line_number;
1006 return;
1009 case '&':
1010 /* Read a bit vector. */
1012 struct object length;
1013 read_object (&length, first_in_list, new_backquote_flag,
1014 null_context);
1015 /* Dots and EOF are not allowed here.
1016 But be tolerant. */
1017 free_object (&length);
1019 c = do_getc ();
1020 if (c == '"')
1022 struct object string;
1023 read_object (&string, first_in_list, new_backquote_flag,
1024 null_context);
1025 free_object (&string);
1027 else
1028 /* Invalid input. Be tolerant, no error message. */
1029 do_ungetc (c);
1030 op->type = t_other;
1031 last_non_comment_line = line_number;
1032 return;
1034 case '[':
1035 /* Read a compiled function, same syntax as a vector. */
1036 case '(':
1037 /* Read a string with properties, same syntax as a list. */
1039 struct object inner;
1040 do_ungetc (c);
1041 read_object (&inner, false, new_backquote_flag, null_context);
1042 /* Dots and EOF are not allowed here.
1043 But be tolerant. */
1044 free_object (&inner);
1045 op->type = t_other;
1046 last_non_comment_line = line_number;
1047 return;
1050 case '@':
1051 /* Read a comment consisting of a given number of bytes. */
1053 unsigned int nskip = 0;
1055 for (;;)
1057 c = do_getc ();
1058 if (!(c >= '0' && c <= '9'))
1059 break;
1060 nskip = 10 * nskip + (c - '0');
1062 if (c != EOF)
1064 do_ungetc (c);
1065 for (; nskip > 0; nskip--)
1066 if (do_getc () == EOF)
1067 break;
1069 continue;
1072 case '$':
1073 op->type = t_other;
1074 last_non_comment_line = line_number;
1075 return;
1077 case '\'':
1078 case ':':
1079 case 'S': case 's': /* XEmacs only */
1081 struct object inner;
1082 read_object (&inner, false, new_backquote_flag, null_context);
1083 /* Dots and EOF are not allowed here.
1084 But be tolerant. */
1085 free_object (&inner);
1086 op->type = t_other;
1087 last_non_comment_line = line_number;
1088 return;
1091 case '0': case '1': case '2': case '3': case '4':
1092 case '5': case '6': case '7': case '8': case '9':
1093 /* Read Common Lisp style #n# or #n=. */
1094 for (;;)
1096 c = do_getc ();
1097 if (!(c >= '0' && c <= '9'))
1098 break;
1100 if (c == EOF)
1101 /* Invalid input. Be tolerant, no error message. */
1103 op->type = t_other;
1104 return;
1106 if (c == '=')
1108 read_object (op, false, new_backquote_flag, outer_context);
1109 last_non_comment_line = line_number;
1110 return;
1112 if (c == '#')
1114 op->type = t_other;
1115 last_non_comment_line = line_number;
1116 return;
1118 if (c == 'R' || c == 'r')
1120 /* Read an integer. */
1121 c = do_getc ();
1122 if (c == '+' || c == '-')
1123 c = do_getc ();
1124 for (; c != EOF; c = do_getc ())
1125 if (!c_isalnum (c))
1127 do_ungetc (c);
1128 break;
1130 op->type = t_other;
1131 last_non_comment_line = line_number;
1132 return;
1134 /* Invalid input. Be tolerant, no error message. */
1135 op->type = t_other;
1136 last_non_comment_line = line_number;
1137 return;
1139 case 'X': case 'x':
1140 case 'O': case 'o':
1141 case 'B': case 'b':
1143 /* Read an integer. */
1144 c = do_getc ();
1145 if (c == '+' || c == '-')
1146 c = do_getc ();
1147 for (; c != EOF; c = do_getc ())
1148 if (!c_isalnum (c))
1150 do_ungetc (c);
1151 break;
1153 op->type = t_other;
1154 last_non_comment_line = line_number;
1155 return;
1158 case '*': /* XEmacs only */
1160 /* Read a bit-vector. */
1162 c = do_getc ();
1163 while (c == '0' || c == '1');
1164 if (c != EOF)
1165 do_ungetc (c);
1166 op->type = t_other;
1167 last_non_comment_line = line_number;
1168 return;
1171 case '+': /* XEmacs only */
1172 case '-': /* XEmacs only */
1173 /* Simply assume every feature expression is true. */
1175 struct object inner;
1176 read_object (&inner, false, new_backquote_flag, null_context);
1177 /* Dots and EOF are not allowed here.
1178 But be tolerant. */
1179 free_object (&inner);
1180 continue;
1183 default:
1184 /* Invalid input. Be tolerant, no error message. */
1185 op->type = t_other;
1186 last_non_comment_line = line_number;
1187 return;
1190 /*NOTREACHED*/
1191 abort ();
1193 case '.':
1194 c = do_getc ();
1195 if (c != EOF)
1197 do_ungetc (c);
1198 if (c <= ' ' /* FIXME: Assumes ASCII compatible encoding */
1199 || strchr ("\"'`,(", c) != NULL)
1201 op->type = t_dot;
1202 last_non_comment_line = line_number;
1203 return;
1206 c = '.';
1207 /*FALLTHROUGH*/
1208 default:
1209 default_label:
1210 if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
1211 continue;
1212 /* Read a token. */
1214 bool symbol;
1216 op->token = (struct token *) xmalloc (sizeof (struct token));
1217 symbol = read_token (op->token, c);
1218 if (symbol)
1220 op->type = t_symbol;
1221 last_non_comment_line = line_number;
1222 return;
1224 else
1226 free_token (op->token);
1227 free (op->token);
1228 op->type = t_other;
1229 last_non_comment_line = line_number;
1230 return;
1238 void
1239 extract_elisp (FILE *f,
1240 const char *real_filename, const char *logical_filename,
1241 flag_context_list_table_ty *flag_table,
1242 msgdomain_list_ty *mdlp)
1244 mlp = mdlp->item[0]->messages;
1246 fp = f;
1247 real_file_name = real_filename;
1248 logical_file_name = xstrdup (logical_filename);
1249 line_number = 1;
1251 last_comment_line = -1;
1252 last_non_comment_line = -1;
1254 flag_context_list_table = flag_table;
1256 init_keywords ();
1258 /* Eat tokens until eof is seen. When read_object returns
1259 due to an unbalanced closing parenthesis, just restart it. */
1262 struct object toplevel_object;
1264 read_object (&toplevel_object, false, false, null_context);
1266 if (toplevel_object.type == t_eof)
1267 break;
1269 free_object (&toplevel_object);
1271 while (!feof (fp));
1273 /* Close scanner. */
1274 fp = NULL;
1275 real_file_name = NULL;
1276 logical_file_name = NULL;
1277 line_number = 0;