Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / x-sh.c
blob66faadd78288d93d24c2553c8956b6a2b3ce4c99
1 /* xgettext sh backend.
2 Copyright (C) 2003 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2003.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
23 #include <errno.h>
24 #include <limits.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
30 #include "message.h"
31 #include "xgettext.h"
32 #include "x-sh.h"
33 #include "error.h"
34 #include "xalloc.h"
35 #include "exit.h"
36 #include "hash.h"
37 #include "gettext.h"
39 #define _(s) gettext(s)
41 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
44 /* The sh syntax is defined in POSIX:2001, see
45 http://www.opengroup.org/onlinepubs/007904975/utilities/xcu_chap02.html
46 Summary of sh syntax:
47 - Input is broken into words, which are then subject to
48 - tilde expansion ~...
49 - command substitution `...`
50 - variable substitution $var
51 - arithmetic substitution $((...))
52 - field splitting at whitespace (IFS)
53 - wildcard pattern expansion *?
54 - quote removal
55 - Strings are enclosed in "..."; command substitution, variable
56 substitution and arithmetic substitution are performed here as well.
57 - '...' is a string without substitutions.
58 - The list of resulting words is split into commands by semicolon and
59 newline.
60 - '#' at the beginning of a word introduces a comment until end of line.
61 The parser is implemented in bash-2.05b/parse.y. */
64 /* ====================== Keyword set customization. ====================== */
66 /* If true extract all strings. */
67 static bool extract_all = false;
69 static hash_table keywords;
70 static bool default_keywords = true;
73 void
74 x_sh_extract_all ()
76 extract_all = true;
80 void
81 x_sh_keyword (const char *name)
83 if (name == NULL)
84 default_keywords = false;
85 else
87 const char *end;
88 int argnum1;
89 int argnum2;
90 const char *colon;
92 if (keywords.table == NULL)
93 init_hash (&keywords, 100);
95 split_keywordspec (name, &end, &argnum1, &argnum2);
97 /* The characters between name and end should form a valid C identifier.
98 A colon means an invalid parse in split_keywordspec(). */
99 colon = strchr (name, ':');
100 if (colon == NULL || colon >= end)
102 if (argnum1 == 0)
103 argnum1 = 1;
104 insert_entry (&keywords, name, end - name,
105 (void *) (long) (argnum1 + (argnum2 << 10)));
110 /* Finish initializing the keywords hash table.
111 Called after argument processing, before each file is processed. */
112 static void
113 init_keywords ()
115 if (default_keywords)
117 x_sh_keyword ("gettext");
118 x_sh_keyword ("ngettext:1,2");
119 x_sh_keyword ("eval_gettext");
120 x_sh_keyword ("eval_ngettext:1,2");
121 default_keywords = false;
125 void
126 init_flag_table_sh ()
128 xgettext_record_flag ("gettext:1:pass-sh-format");
129 xgettext_record_flag ("ngettext:1:pass-sh-format");
130 xgettext_record_flag ("ngettext:2:pass-sh-format");
131 xgettext_record_flag ("eval_gettext:1:sh-format");
132 xgettext_record_flag ("eval_ngettext:1:sh-format");
133 xgettext_record_flag ("eval_ngettext:2:sh-format");
137 /* ======================== Reading of characters. ======================== */
139 /* Real filename, used in error messages about the input file. */
140 static const char *real_file_name;
142 /* Logical filename and line number, used to label the extracted messages. */
143 static char *logical_file_name;
144 static int line_number;
146 /* The input file stream. */
147 static FILE *fp;
150 /* Fetch the next character from the input file. */
151 static int
152 do_getc ()
154 int c = getc (fp);
156 if (c == EOF)
158 if (ferror (fp))
159 error (EXIT_FAILURE, errno, _("\
160 error while reading \"%s\""), real_file_name);
162 else if (c == '\n')
163 line_number++;
165 return c;
168 /* Put back the last fetched character, not EOF. */
169 static void
170 do_ungetc (int c)
172 if (c == '\n')
173 line_number--;
174 ungetc (c, fp);
178 /* Remove backslash followed by newline from the input stream. */
180 static int phase1_pushback[1];
181 static int phase1_pushback_length;
183 static int
184 phase1_getc ()
186 int c;
188 if (phase1_pushback_length)
190 c = phase1_pushback[--phase1_pushback_length];
191 if (c == '\n')
192 ++line_number;
193 return c;
195 for (;;)
197 c = do_getc ();
198 if (c != '\\')
199 return c;
200 c = do_getc ();
201 if (c != '\n')
203 if (c != EOF)
204 do_ungetc (c);
205 return '\\';
210 /* Supports only one pushback character. */
211 static void
212 phase1_ungetc (int c)
214 switch (c)
216 case EOF:
217 break;
219 case '\n':
220 --line_number;
221 /* FALLTHROUGH */
223 default:
224 if (phase1_pushback_length == SIZEOF (phase1_pushback))
225 abort ();
226 phase1_pushback[phase1_pushback_length++] = c;
227 break;
232 /* ========================== Reading of tokens. ========================== */
235 /* A token consists of a sequence of characters. */
236 struct token
238 int allocated; /* number of allocated 'token_char's */
239 int charcount; /* number of used 'token_char's */
240 char *chars; /* the token's constituents */
243 /* Initialize a 'struct token'. */
244 static inline void
245 init_token (struct token *tp)
247 tp->allocated = 10;
248 tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
249 tp->charcount = 0;
252 /* Free the memory pointed to by a 'struct token'. */
253 static inline void
254 free_token (struct token *tp)
256 free (tp->chars);
259 /* Ensure there is enough room in the token for one more character. */
260 static inline void
261 grow_token (struct token *tp)
263 if (tp->charcount == tp->allocated)
265 tp->allocated *= 2;
266 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
270 /* Convert a struct token * to a char*. */
271 static char *
272 string_of_token (const struct token *tp)
274 char *str;
275 int n;
277 n = tp->charcount;
278 str = (char *) xmalloc (n + 1);
279 memcpy (str, tp->chars, n);
280 str[n] = '\0';
281 return str;
285 /* ========================= Accumulating messages ========================= */
288 static message_list_ty *mlp;
291 /* ========================= Accumulating comments ========================= */
294 static char *buffer;
295 static size_t bufmax;
296 static size_t buflen;
298 static inline void
299 comment_start ()
301 buflen = 0;
304 static inline void
305 comment_add (int c)
307 if (buflen >= bufmax)
309 bufmax = 2 * bufmax + 10;
310 buffer = xrealloc (buffer, bufmax);
312 buffer[buflen++] = c;
315 static inline void
316 comment_line_end ()
318 while (buflen >= 1
319 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
320 --buflen;
321 if (buflen >= bufmax)
323 bufmax = 2 * bufmax + 10;
324 buffer = xrealloc (buffer, bufmax);
326 buffer[buflen] = '\0';
327 xgettext_comment_add (buffer);
331 /* These are for tracking whether comments count as immediately before
332 keyword. */
333 static int last_comment_line;
334 static int last_non_comment_line;
337 /* ========================= Debackslashification ========================== */
339 /* This state tracks the effect of backquotes, double-quotes and single-quotes
340 on the parsing of backslashes. We make a single pass through the input
341 file, keeping the state up to date. This is much faster than accumulating
342 strings and processing them with explicit debackslashification, like the
343 shell does it. */
345 /* The number of nested `...` or "`...`" constructs. Assumed to be <= 32. */
346 static unsigned int nested_backquotes;
348 /* A bit mask indicating which of the currently open `...` or "`...`"
349 constructs is with double-quotes: "`...`".
350 A bit value of 1 stands for "`...`", a bit value of 0 stands for `...`.
351 Bit position 0 designates the outermost backquotes nesting,
352 bit position 1 the second-outermost backquotes nesting,
354 bit position (nested_backquotes-1) the innermost backquotes nesting. */
355 static unsigned int open_doublequotes_mask;
357 /* A bit indicating whether a double-quote is currently open inside the
358 innermost backquotes nesting. */
359 static bool open_doublequote;
361 /* A bit indicating whether a single-quote is currently open inside the
362 innermost backquotes nesting. */
363 static bool open_singlequote;
366 /* Functions to update the state. */
368 static inline void
369 saw_opening_backquote ()
371 if (open_singlequote)
372 abort ();
373 if (open_doublequote)
374 open_doublequotes_mask |= (unsigned int) 1 << nested_backquotes;
375 nested_backquotes++;
376 open_doublequote = false;
379 static inline void
380 saw_closing_backquote ()
382 nested_backquotes--;
383 open_doublequote = (open_doublequotes_mask >> nested_backquotes) & 1;
384 open_doublequotes_mask &= ((unsigned int) 1 << nested_backquotes) - 1;
385 open_singlequote = false; /* just for safety */
388 static inline void
389 saw_opening_doublequote ()
391 if (open_singlequote || open_doublequote)
392 abort ();
393 open_doublequote = true;
396 static inline void
397 saw_closing_doublequote ()
399 if (open_singlequote || !open_doublequote)
400 abort ();
401 open_doublequote = false;
404 static inline void
405 saw_opening_singlequote ()
407 if (open_doublequote || open_singlequote)
408 abort ();
409 open_singlequote = true;
412 static inline void
413 saw_closing_singlequote ()
415 if (open_doublequote || !open_singlequote)
416 abort ();
417 open_singlequote = false;
421 /* ========================== Reading of commands ========================== */
423 /* We are only interested in constant strings. Other words need not to be
424 represented precisely. */
425 enum word_type
427 t_string, /* constant string */
428 t_other, /* other string */
429 t_separator, /* command separator: semicolon or newline */
430 t_redirect, /* redirection: one of < > >| << <<- >> <> <& >& */
431 t_backquote, /* closing '`' pseudo word */
432 t_paren, /* closing ')' pseudo word */
433 t_eof /* EOF marker */
436 struct word
438 enum word_type type;
439 struct token *token; /* for t_string */
440 int line_number_at_start; /* for t_string */
443 /* Free the memory pointed to by a 'struct word'. */
444 static inline void
445 free_word (struct word *wp)
447 if (wp->type == t_string)
449 free_token (wp->token);
450 free (wp->token);
454 /* Convert a t_string token to a char*. */
455 static char *
456 string_of_word (const struct word *wp)
458 char *str;
459 int n;
461 if (!(wp->type == t_string))
462 abort ();
463 n = wp->token->charcount;
464 str = (char *) xmalloc (n + 1);
465 memcpy (str, wp->token->chars, n);
466 str[n] = '\0';
467 return str;
471 /* Whitespace recognition. */
473 static inline bool
474 is_whitespace (int c)
476 return (c == ' ' || c == '\t' || c == '\n');
479 /* Operator character recognition. */
481 static inline bool
482 is_operator_start (int c)
484 return (c == '|' || c == '&' || c == ';' || c == '<' || c == '>'
485 || c == '(' || c == ')');
489 /* Denotation of a quoted character.
490 The distinction between quoted and unquoted character is important only for
491 the special, whitespace and operator characters; it is irrelevant for
492 alphanumeric characters, '\\' and many others. */
493 #define QUOTED(c) (UCHAR_MAX + 1 + (c))
494 /* Values in the 'unsigned char' range are implicitly unquoted. Among these,
495 the following are important:
496 '"' opening or closing double quote
497 '\'' opening or closing single quote
498 '$' the unknown result of a dollar expansion
499 '`' does not occur - replaced with OPENING_BACKQUOTE or
500 CLOSING_BACKQUOTE
502 #define OPENING_BACKQUOTE (2 * (UCHAR_MAX + 1) + '`')
503 #define CLOSING_BACKQUOTE (3 * (UCHAR_MAX + 1) + '`')
505 static int phase2_pushback[2];
506 static int phase2_pushback_length;
508 /* Return the next character, with backslashes removed.
509 The result is QUOTED(c) for some unsigned char c, if the next character
510 is escaped sufficiently often to make it a regular constituent character,
511 or simply an 'unsigned char' if it has its special meaning (of special,
512 whitespace or operator charcter), or OPENING_BACKQUOTE, CLOSING_BACKQUOTE,
513 EOF.
514 It's the caller's responsibility to update the state. */
515 static int
516 phase2_getc ()
518 int c;
520 if (phase2_pushback_length)
522 c = phase2_pushback[--phase2_pushback_length];
523 if (c == '\n')
524 ++line_number;
525 return c;
528 c = phase1_getc ();
529 if (c == EOF)
530 return c;
531 if (c == '\'')
532 return (open_doublequote ? QUOTED (c) : c);
533 if (!open_singlequote)
535 if (c == '"' || c == '$')
536 return c;
537 if (c == '`')
538 return (nested_backquotes > 0 ? CLOSING_BACKQUOTE : OPENING_BACKQUOTE);
540 if (c == '\\')
542 /* Number of debackslahificication passes that are active at the
543 current point. */
544 unsigned int debackslahify =
545 nested_backquotes + (open_singlequote ? 0 : 1);
546 /* Normal number of backslashes that yield a single backslash in the
547 final output. */
548 unsigned int expected_count =
549 (unsigned int) 1 << debackslahify;
550 /* Number of backslashes found. */
551 unsigned int count;
553 for (count = 1; count < expected_count; count++)
555 c = phase1_getc ();
556 if (c != '\\')
557 break;
559 if (count == expected_count)
560 return '\\';
562 /* The count of backslashes is > 0 and < expected_count, therefore the
563 result depends on c, the first character after the backslashes.
564 Note: The formulas below don't necessarily have a logic; they were
565 empirically determined such that 1. the xgettext-30 test succeeds,
566 2. the behaviour for count == 0 would correspond to the one without
567 any baskslash. */
568 if (c == '\'')
570 if (!open_singlequote && count > (expected_count >> 1))
572 phase1_ungetc (c);
573 return '\\';
575 else
576 return (open_doublequote ? QUOTED (c) : c);
578 else if (c == '"')
580 /* Each debackslahificication pass converts \\ to \ and \" to ";
581 passes corresponding to `...` drop a lone " whereas passes
582 corresponding to "`...`" leave it alone. Therefore, the
583 minimum number of backslashes needed to get one double-quote
584 in the end is open_doublequotes_mask + 1. */
585 if (open_singlequote)
587 if (count > open_doublequotes_mask)
589 phase1_ungetc (c);
590 return '\\';
592 else
593 return QUOTED (c);
595 else
597 if (count > open_doublequotes_mask)
598 return QUOTED (c);
599 else
600 /* Some of the count values <= open_doublequotes_mask are
601 actually invalid here, but we assume a syntactically
602 correct input file anyway. */
603 return c;
606 else if (c == '`')
608 /* FIXME: This code looks fishy. */
609 if (count == expected_count - 1)
610 return c;
611 else
612 /* Some of the count values < expected_count - 1 are
613 actually invalid here, but we assume a syntactically
614 correct input file anyway. */
615 if (nested_backquotes > 0 && !open_singlequote
616 && count >= (expected_count >> 2))
617 return OPENING_BACKQUOTE;
618 else
619 return CLOSING_BACKQUOTE;
621 else if (c == '$')
623 if (open_singlequote)
624 return QUOTED (c);
625 if (count >= (expected_count >> 1))
626 return QUOTED (c);
627 else
628 return c;
630 else
632 /* When not followed by a quoting character or backslash or dollar,
633 a backslash survives a debackslahificication pass unmodified.
634 Therefore each debackslahificication pass performs a
635 count := (count + 1) >> 1
636 operation. Therefore the minimum number of backslashes needed
637 to get one backslash in the end is (expected_count >> 1) + 1. */
638 if (open_doublequote || open_singlequote)
640 if (count > 0)
642 phase1_ungetc (c);
643 return '\\';
645 else
646 return QUOTED (c);
648 else
650 if (count > (expected_count >> 1))
652 phase1_ungetc (c);
653 return '\\';
655 else if (count > 0)
656 return QUOTED (c);
657 else
658 return c;
663 return (open_singlequote || open_doublequote ? QUOTED (c) : c);
666 /* Supports 2 characters of pushback. */
667 static void
668 phase2_ungetc (int c)
670 switch (c)
672 case EOF:
673 break;
675 case '\n':
676 --line_number;
677 /* FALLTHROUGH */
679 default:
680 if (phase2_pushback_length == SIZEOF (phase2_pushback))
681 abort ();
682 phase2_pushback[phase2_pushback_length++] = c;
683 break;
688 /* Context lookup table. */
689 static flag_context_list_table_ty *flag_context_list_table;
692 /* Forward declaration of local functions. */
693 static enum word_type read_command_list (int looking_for,
694 flag_context_ty outer_context);
698 /* Read the next word.
699 'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')'
700 or '\0'. */
701 static void
702 read_word (struct word *wp, int looking_for, flag_context_ty context)
704 int c;
705 bool all_unquoted_digits;
709 c = phase2_getc ();
710 if (c == '#')
712 /* Skip a comment up to end of line. */
713 last_comment_line = line_number;
714 comment_start ();
715 for (;;)
717 c = phase1_getc ();
718 if (c == EOF || c == '\n')
719 break;
720 /* We skip all leading white space, but not EOLs. */
721 if (!(buflen == 0 && (c == ' ' || c == '\t')))
722 comment_add (c);
724 comment_line_end ();
726 if (c == '\n')
728 /* Comments assumed to be grouped with a message must immediately
729 precede it, with no non-whitespace token on a line between
730 both. */
731 if (last_non_comment_line > last_comment_line)
732 xgettext_comment_reset ();
733 wp->type = t_separator;
734 return;
737 while (is_whitespace (c));
739 if (c == EOF)
741 wp->type = t_eof;
742 return;
745 if (c == '<' || c == '>')
747 /* Recognize the redirection operators < > >| << <<- >> <> <& >& */
748 int c2 = phase2_getc ();
749 if ((c == '<' ? c2 == '<' : c2 == '|') || c2 == '>' || c2 == '&')
751 if (c == '<' && c2 == '<')
753 int c3 = phase2_getc ();
754 if (c3 != '-')
755 phase2_ungetc (c3);
758 else
759 phase2_ungetc (c2);
760 wp->type = t_redirect;
761 return;
764 if (looking_for == CLOSING_BACKQUOTE && c == CLOSING_BACKQUOTE)
766 saw_closing_backquote ();
767 wp->type = t_backquote;
768 last_non_comment_line = line_number;
769 return;
772 if (looking_for == ')' && c == ')')
774 wp->type = t_paren;
775 last_non_comment_line = line_number;
776 return;
779 if (is_operator_start (c))
781 wp->type = (c == ';' ? t_separator : t_other);
782 return;
785 wp->type = t_string;
786 wp->token = (struct token *) xmalloc (sizeof (struct token));
787 init_token (wp->token);
788 wp->line_number_at_start = line_number;
789 all_unquoted_digits = true;
791 for (;; c = phase2_getc ())
793 if (c == EOF)
794 break;
796 if (all_unquoted_digits && (c == '<' || c == '>'))
798 /* Recognize the redirection operators < > >| << <<- >> <> <& >&
799 prefixed with a nonempty sequence of unquoted digits. */
800 int c2 = phase2_getc ();
801 if ((c == '<' ? c2 == '<' : c2 == '|') || c2 == '>' || c2 == '&')
803 if (c == '<' && c2 == '<')
805 int c3 = phase2_getc ();
806 if (c3 != '-')
807 phase2_ungetc (c3);
810 else
811 phase2_ungetc (c2);
813 wp->type = t_redirect;
814 free_token (wp->token);
815 free (wp->token);
817 last_non_comment_line = line_number;
819 return;
822 all_unquoted_digits = all_unquoted_digits && (c >= '0' && c <= '9');
824 if (c == '$')
826 int c2 = phase2_getc ();
827 if (c2 == '(')
829 int c3 = phase2_getc ();
830 if (c3 == '(')
832 /* Arithmetic expression. Skip until the matching closing
833 parenthesis. */
834 unsigned int depth = 2;
838 c = phase2_getc ();
839 if (c == '(')
840 depth++;
841 else if (c == ')')
842 if (--depth == 0)
843 break;
845 while (c != EOF);
847 else
849 /* Command substitution. */
850 phase2_ungetc (c3);
851 read_command_list (')', context);
854 else if (c2 == '\'' && !open_singlequote)
856 /* Bash builtin for string with ANSI-C escape sequences. */
857 saw_opening_singlequote ();
858 for (;;)
860 c = phase2_getc ();
861 if (c == EOF)
862 break;
863 if (c == '\'')
865 saw_closing_singlequote ();
866 break;
868 if (c == '\\')
870 c = phase2_getc ();
871 switch (c)
873 default:
874 phase2_ungetc (c);
875 c = '\\';
876 break;
878 case '\\':
879 break;
880 case '\'':
881 /* Don't call saw_closing_singlequote () here. */
882 break;
884 case 'a':
885 c = '\a';
886 break;
887 case 'b':
888 c = '\b';
889 break;
890 case 'e':
891 c = 0x1b; /* ESC */
892 break;
893 case 'f':
894 c = '\f';
895 break;
896 case 'n':
897 c = '\n';
898 break;
899 case 'r':
900 c = '\r';
901 break;
902 case 't':
903 c = '\t';
904 break;
905 case 'v':
906 c = '\v';
907 break;
909 case 'x':
910 c = phase2_getc ();
911 if ((c >= '0' && c <= '9')
912 || (c >= 'A' && c <= 'F')
913 || (c >= 'a' && c <= 'f'))
915 int n;
917 if (c >= '0' && c <= '9')
918 n = c - '0';
919 else if (c >= 'A' && c <= 'F')
920 n = 10 + c - 'A';
921 else if (c >= 'a' && c <= 'f')
922 n = 10 + c - 'a';
923 else
924 abort ();
926 c = phase2_getc ();
927 if ((c >= '0' && c <= '9')
928 || (c >= 'A' && c <= 'F')
929 || (c >= 'a' && c <= 'f'))
931 if (c >= '0' && c <= '9')
932 n = n * 16 + c - '0';
933 else if (c >= 'A' && c <= 'F')
934 n = n * 16 + 10 + c - 'A';
935 else if (c >= 'a' && c <= 'f')
936 n = n * 16 + 10 + c - 'a';
937 else
938 abort ();
940 else
941 phase2_ungetc (c);
943 c = n;
945 else
947 phase2_ungetc (c);
948 phase2_ungetc ('x');
949 c = '\\';
951 break;
953 case '0': case '1': case '2': case '3':
954 case '4': case '5': case '6': case '7':
956 int n = c - '0';
958 c = phase2_getc ();
959 if (c >= '0' && c <= '7')
961 n = n * 8 + c - '0';
963 c = phase2_getc ();
964 if (c >= '0' && c <= '7')
965 n = n * 8 + c - '0';
966 else
967 phase2_ungetc (c);
969 else
970 phase2_ungetc (c);
972 c = n;
974 break;
977 if (wp->type == t_string)
979 grow_token (wp->token);
980 wp->token->chars[wp->token->charcount++] =
981 (unsigned char) c;
984 /* The result is a literal string. Don't change wp->type. */
985 continue;
987 else if (c2 == '"' && !open_doublequote)
989 /* Bash builtin for internationalized string. */
990 lex_pos_ty pos;
991 struct token string;
993 saw_opening_doublequote ();
994 pos.file_name = logical_file_name;
995 pos.line_number = line_number;
996 init_token (&string);
997 for (;;)
999 c = phase2_getc ();
1000 if (c == EOF)
1001 break;
1002 if (c == '"')
1004 saw_closing_doublequote ();
1005 break;
1007 grow_token (&string);
1008 string.chars[string.charcount++] = (unsigned char) c;
1010 remember_a_message (mlp, string_of_token (&string),
1011 context, &pos);
1012 free_token (&string);
1014 error_with_progname = false;
1015 error (0, 0, _("%s:%lu: warning: the syntax $\"...\" is deprecated due to security reasons; use eval_gettext instead"),
1016 pos.file_name, (unsigned long) pos.line_number);
1017 error_with_progname = true;
1019 /* The result at runtime is not constant. Therefore we
1020 change wp->type. */
1022 else
1023 phase2_ungetc (c2);
1024 wp->type = t_other;
1025 continue;
1028 if (c == '\'')
1030 if (!open_singlequote)
1032 /* Handle an opening single quote. */
1033 saw_opening_singlequote ();
1035 else
1037 /* Handle a closing single quote. */
1038 saw_closing_singlequote ();
1040 continue;
1043 if (c == '"')
1045 if (!open_doublequote)
1047 /* Handle an opening double quote. */
1048 saw_opening_doublequote ();
1050 else
1052 /* Handle a closing double quote. */
1053 saw_closing_doublequote ();
1055 continue;
1058 if (c == OPENING_BACKQUOTE)
1060 /* Handle an opening backquote. */
1061 saw_opening_backquote ();
1063 read_command_list (CLOSING_BACKQUOTE, context);
1065 wp->type = t_other;
1066 continue;
1068 if (c == CLOSING_BACKQUOTE)
1069 break;
1071 if (!open_singlequote && !open_doublequote
1072 && (is_whitespace (c) || is_operator_start (c)))
1073 break;
1075 if (wp->type == t_string)
1077 grow_token (wp->token);
1078 wp->token->chars[wp->token->charcount++] = (unsigned char) c;
1082 phase2_ungetc (c);
1084 if (wp->type != t_string)
1086 free_token (wp->token);
1087 free (wp->token);
1089 last_non_comment_line = line_number;
1093 /* Read the next command.
1094 'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')'
1095 or '\0'.
1096 Returns the type of the word that terminated the command. */
1097 static enum word_type
1098 read_command (int looking_for, flag_context_ty outer_context)
1100 /* Read the words that make up the command.
1101 Here we completely ignore field splitting at whitespace and wildcard
1102 expansions; i.e. we assume that the source is written in such a way that
1103 every word in the program determines exactly one word in the resulting
1104 command.
1105 But we do not require that the 'gettext'/'ngettext' command is the
1106 first in the command; this is because 1. we want to allow for prefixes
1107 like "$verbose" that may expand to nothing, and 2. it's a big effort
1108 to know where a command starts in a $(for ...) or $(case ...) compound
1109 command. */
1110 int arg = 0; /* Current argument number. */
1111 bool arg_of_redirect = false; /* True right after a redirection operator. */
1112 flag_context_list_iterator_ty context_iter;
1113 int argnum1 = -1; /* First string position. */
1114 int argnum2 = -1; /* Plural string position. */
1115 message_ty *plural_mp = NULL; /* Remember the msgid. */
1117 for (;;)
1119 struct word inner;
1120 flag_context_ty inner_context;
1122 if (arg == 0)
1123 inner_context = null_context;
1124 else
1125 inner_context =
1126 inherited_context (outer_context,
1127 flag_context_list_iterator_advance (
1128 &context_iter));
1130 read_word (&inner, looking_for, inner_context);
1132 /* Recognize end of command. */
1133 if (inner.type == t_separator
1134 || inner.type == t_backquote || inner.type == t_paren
1135 || inner.type == t_eof)
1136 return inner.type;
1138 if (extract_all)
1140 if (inner.type == t_string)
1142 lex_pos_ty pos;
1144 pos.file_name = logical_file_name;
1145 pos.line_number = inner.line_number_at_start;
1146 remember_a_message (mlp, string_of_word (&inner),
1147 inner_context, &pos);
1151 if (arg_of_redirect)
1153 /* Ignore arguments of redirection operators. */
1154 arg_of_redirect = false;
1156 else if (inner.type == t_redirect)
1158 /* Ignore this word and the following one. */
1159 arg_of_redirect = true;
1161 else
1163 if (argnum1 < 0 && argnum2 < 0)
1165 /* This is the function position. */
1166 arg = 0;
1167 if (inner.type == t_string)
1169 char *function_name = string_of_word (&inner);
1170 void *keyword_value;
1172 if (find_entry (&keywords,
1173 function_name, strlen (function_name),
1174 &keyword_value)
1175 == 0)
1177 argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
1178 argnum2 = (int) (long) keyword_value >> 10;
1181 context_iter =
1182 flag_context_list_iterator (
1183 flag_context_list_table_lookup (
1184 flag_context_list_table,
1185 function_name, strlen (function_name)));
1187 free (function_name);
1189 else
1190 context_iter = null_context_list_iterator;
1192 else
1194 /* These are the argument positions.
1195 Extract a string if we have reached the right
1196 argument position. */
1197 if (arg == argnum1)
1199 if (inner.type == t_string)
1201 lex_pos_ty pos;
1202 message_ty *mp;
1204 pos.file_name = logical_file_name;
1205 pos.line_number = inner.line_number_at_start;
1206 mp = remember_a_message (mlp, string_of_word (&inner),
1207 inner_context, &pos);
1208 if (argnum2 > 0)
1209 plural_mp = mp;
1212 else if (arg == argnum2)
1214 if (inner.type == t_string && plural_mp != NULL)
1216 lex_pos_ty pos;
1218 pos.file_name = logical_file_name;
1219 pos.line_number = inner.line_number_at_start;
1220 remember_a_message_plural (plural_mp, string_of_word (&inner),
1221 inner_context, &pos);
1225 if (arg >= argnum1 && arg >= argnum2)
1227 /* Stop looking for arguments of the last function_name. */
1228 /* FIXME: What about context_iter? */
1229 argnum1 = -1;
1230 argnum2 = -1;
1231 plural_mp = NULL;
1235 arg++;
1238 free_word (&inner);
1243 /* Read a list of commands.
1244 'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')'
1245 or '\0'.
1246 Returns the type of the word that terminated the command list. */
1247 static enum word_type
1248 read_command_list (int looking_for, flag_context_ty outer_context)
1250 for (;;)
1252 enum word_type terminator;
1254 terminator = read_command (looking_for, outer_context);
1255 if (terminator != t_separator)
1256 return terminator;
1261 void
1262 extract_sh (FILE *f,
1263 const char *real_filename, const char *logical_filename,
1264 flag_context_list_table_ty *flag_table,
1265 msgdomain_list_ty *mdlp)
1267 mlp = mdlp->item[0]->messages;
1269 fp = f;
1270 real_file_name = real_filename;
1271 logical_file_name = xstrdup (logical_filename);
1272 line_number = 1;
1274 last_comment_line = -1;
1275 last_non_comment_line = -1;
1277 nested_backquotes = 0;
1278 open_doublequotes_mask = 0;
1279 open_doublequote = false;
1280 open_singlequote = false;
1282 flag_context_list_table = flag_table;
1284 init_keywords ();
1286 /* Eat tokens until eof is seen. */
1287 read_command_list ('\0', null_context);
1289 fp = NULL;
1290 real_file_name = NULL;
1291 logical_file_name = NULL;
1292 line_number = 0;