1 /* xgettext Perl backend.
2 Copyright (C) 2002-2004 Free Software Foundation, Inc.
4 This file was written by Guido Flohr <guido@imperia.net>, 2002-2003.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
34 #include "error-progname.h"
37 #include "po-charset.h"
38 #include "ucs4-utf8.h"
43 #define _(s) gettext(s)
45 /* The Perl syntax is defined in perlsyn.pod. Try the command
46 "man perlsyn" or "perldoc perlsyn".
47 Also, the syntax after the 'sub' keyword is specified in perlsub.pod.
48 Try the command "man perlsub" or "perldoc perlsub". */
53 /* ====================== Keyword set customization. ====================== */
55 /* If true extract all strings. */
56 static bool extract_all
= false;
58 static hash_table keywords
;
59 static bool default_keywords
= true;
70 x_perl_keyword (const char *name
)
73 default_keywords
= false;
81 if (keywords
.table
== NULL
)
82 init_hash (&keywords
, 100);
84 split_keywordspec (name
, &end
, &argnum1
, &argnum2
);
86 /* The characters between name and end should form a valid C identifier.
87 A colon means an invalid parse in split_keywordspec(). */
88 colon
= strchr (name
, ':');
89 if (colon
== NULL
|| colon
>= end
)
93 insert_entry (&keywords
, name
, end
- name
,
94 (void *) (long) (argnum1
+ (argnum2
<< 10)));
99 /* Finish initializing the keywords hash table.
100 Called after argument processing, before each file is processed. */
104 if (default_keywords
)
106 x_perl_keyword ("gettext");
107 x_perl_keyword ("%gettext");
108 x_perl_keyword ("$gettext");
109 x_perl_keyword ("dgettext:2");
110 x_perl_keyword ("dcgettext:2");
111 x_perl_keyword ("ngettext:1,2");
112 x_perl_keyword ("dngettext:2,3");
113 x_perl_keyword ("dcngettext:2,3");
114 x_perl_keyword ("gettext_noop");
116 x_perl_keyword ("__");
117 x_perl_keyword ("$__");
118 x_perl_keyword ("%__");
119 x_perl_keyword ("__x");
120 x_perl_keyword ("__n:1,2");
121 x_perl_keyword ("__nx:1,2");
122 x_perl_keyword ("__xn:1,2");
123 x_perl_keyword ("N__");
125 default_keywords
= false;
130 init_flag_table_perl ()
132 xgettext_record_flag ("gettext:1:pass-perl-format");
133 xgettext_record_flag ("gettext:1:pass-perl-brace-format");
134 xgettext_record_flag ("%gettext:1:pass-perl-format");
135 xgettext_record_flag ("%gettext:1:pass-perl-brace-format");
136 xgettext_record_flag ("$gettext:1:pass-perl-format");
137 xgettext_record_flag ("$gettext:1:pass-perl-brace-format");
138 xgettext_record_flag ("dgettext:2:pass-perl-format");
139 xgettext_record_flag ("dgettext:2:pass-perl-brace-format");
140 xgettext_record_flag ("dcgettext:2:pass-perl-format");
141 xgettext_record_flag ("dcgettext:2:pass-perl-brace-format");
142 xgettext_record_flag ("ngettext:1:pass-perl-format");
143 xgettext_record_flag ("ngettext:2:pass-perl-format");
144 xgettext_record_flag ("ngettext:1:pass-perl-brace-format");
145 xgettext_record_flag ("ngettext:2:pass-perl-brace-format");
146 xgettext_record_flag ("dngettext:2:pass-perl-format");
147 xgettext_record_flag ("dngettext:3:pass-perl-format");
148 xgettext_record_flag ("dngettext:2:pass-perl-brace-format");
149 xgettext_record_flag ("dngettext:3:pass-perl-brace-format");
150 xgettext_record_flag ("dcngettext:2:pass-perl-format");
151 xgettext_record_flag ("dcngettext:3:pass-perl-format");
152 xgettext_record_flag ("dcngettext:2:pass-perl-brace-format");
153 xgettext_record_flag ("dcngettext:3:pass-perl-brace-format");
154 xgettext_record_flag ("gettext_noop:1:pass-perl-format");
155 xgettext_record_flag ("gettext_noop:1:pass-perl-brace-format");
156 xgettext_record_flag ("printf:1:perl-format"); /* argument 1 or 2 ?? */
157 xgettext_record_flag ("sprintf:1:perl-format");
159 xgettext_record_flag ("__:1:pass-perl-format");
160 xgettext_record_flag ("__:1:pass-perl-brace-format");
161 xgettext_record_flag ("%__:1:pass-perl-format");
162 xgettext_record_flag ("%__:1:pass-perl-brace-format");
163 xgettext_record_flag ("$__:1:pass-perl-format");
164 xgettext_record_flag ("$__:1:pass-perl-brace-format");
165 xgettext_record_flag ("__x:1:perl-brace-format");
166 xgettext_record_flag ("__n:1:pass-perl-format");
167 xgettext_record_flag ("__n:2:pass-perl-format");
168 xgettext_record_flag ("__n:1:pass-perl-brace-format");
169 xgettext_record_flag ("__n:2:pass-perl-brace-format");
170 xgettext_record_flag ("__nx:1:perl-brace-format");
171 xgettext_record_flag ("__nx:2:perl-brace-format");
172 xgettext_record_flag ("__xn:1:perl-brace-format");
173 xgettext_record_flag ("__xn:2:perl-brace-format");
174 xgettext_record_flag ("N__:1:pass-perl-format");
175 xgettext_record_flag ("N__:1:pass-perl-brace-format");
180 /* ======================== Reading of characters. ======================== */
182 /* Real filename, used in error messages about the input file. */
183 static const char *real_file_name
;
185 /* Logical filename and line number, used to label the extracted messages. */
186 static char *logical_file_name
;
187 static int line_number
;
189 /* The input file stream. */
192 /* The current line buffer. */
193 static char *linebuf
;
195 /* The size of the current line. */
198 /* The position in the current line. */
201 /* The size of the input buffer. */
202 static size_t linebuf_size
;
204 /* Number of lines eaten for here documents. */
205 static int here_eaten
;
207 /* Paranoia: EOF marker for __END__ or __DATA__. */
208 static bool end_of_file
;
211 /* 1. line_number handling. */
213 /* Returns the next character from the input stream or EOF. */
217 line_number
+= here_eaten
;
223 if (linepos
>= linesize
)
225 linesize
= getline (&linebuf
, &linebuf_size
, fp
);
230 error (EXIT_FAILURE
, errno
, _("error while reading \"%s\""),
239 /* Undosify. This is important for catching the end of <<EOF and
240 <<'EOF'. We could rely on stdio doing this for us but you
241 it is not uncommon to to come across Perl scripts with CRLF
242 newline conventions on systems that do not follow this
244 if (linesize
>= 2 && linebuf
[linesize
- 1] == '\n'
245 && linebuf
[linesize
- 2] == '\r')
247 linebuf
[linesize
- 2] = '\n';
248 linebuf
[linesize
- 1] = '\0';
253 return linebuf
[linepos
++];
256 /* Supports only one pushback character. */
258 phase1_ungetc (int c
)
263 /* Attempt to ungetc across line boundary. Shouldn't happen.
264 No two phase1_ungetc calls are permitted in a row. */
271 /* Read a here document and return its contents.
272 The delimiter is an UTF-8 encoded string; the resulting string is UTF-8
276 get_here_document (const char *delimiter
)
278 /* Accumulator for the entire here document, including a NUL byte
281 static size_t bufmax
= 0;
283 /* Current line being appended. */
284 static char *my_linebuf
= NULL
;
285 static size_t my_linebuf_size
= 0;
287 /* Allocate the initial buffer. Later on, bufmax > 0. */
290 buffer
= xrealloc (NULL
, 1);
297 int read_bytes
= getline (&my_linebuf
, &my_linebuf_size
, fp
);
305 error (EXIT_FAILURE
, errno
, _("error while reading \"%s\""),
310 error_with_progname
= false;
311 error (EXIT_SUCCESS
, 0, _("\
312 %s:%d: can't find string terminator \"%s\" anywhere before EOF"),
313 real_file_name
, line_number
, delimiter
);
314 error_with_progname
= true;
322 /* Convert to UTF-8. */
324 from_current_source_encoding (my_linebuf
, logical_file_name
,
325 line_number
+ here_eaten
);
326 if (my_line_utf8
!= my_linebuf
)
328 if (strlen (my_line_utf8
) >= my_linebuf_size
)
330 my_linebuf_size
= strlen (my_line_utf8
) + 1;
331 my_linebuf
= xrealloc (my_linebuf
, my_linebuf_size
);
333 strcpy (my_linebuf
, my_line_utf8
);
337 /* Undosify. This is important for catching the end of <<EOF and
338 <<'EOF'. We could rely on stdio doing this for us but you
339 it is not uncommon to to come across Perl scripts with CRLF
340 newline conventions on systems that do not follow this
342 if (read_bytes
>= 2 && my_linebuf
[read_bytes
- 1] == '\n'
343 && my_linebuf
[read_bytes
- 2] == '\r')
345 my_linebuf
[read_bytes
- 2] = '\n';
346 my_linebuf
[read_bytes
- 1] = '\0';
350 /* Temporarily remove the trailing newline from my_linebuf. */
352 if (read_bytes
>= 1 && my_linebuf
[read_bytes
- 1] == '\n')
355 my_linebuf
[read_bytes
- 1] = '\0';
358 /* See whether this line terminates the here document. */
359 if (strcmp (my_linebuf
, delimiter
) == 0)
362 /* Add back the trailing newline to my_linebuf. */
364 my_linebuf
[read_bytes
- 1] = '\n';
366 /* Ensure room for read_bytes + 1 bytes. */
367 if (bufpos
+ read_bytes
>= bufmax
)
370 bufmax
= 2 * bufmax
+ 10;
371 while (bufpos
+ read_bytes
>= bufmax
);
372 buffer
= xrealloc (buffer
, bufmax
);
374 /* Append this line to the accumulator. */
375 strcpy (buffer
+ bufpos
, my_linebuf
);
376 bufpos
+= read_bytes
;
379 /* Done accumulating the here document. */
380 return xstrdup (buffer
);
383 /* Skips pod sections. */
387 line_number
+= here_eaten
;
393 linesize
= getline (&linebuf
, &linebuf_size
, fp
);
398 error (EXIT_FAILURE
, errno
, _("error while reading \"%s\""),
405 if (strncmp ("=cut", linebuf
, 4) == 0)
407 /* Force reading of a new line on next call to phase1_getc(). */
415 /* These are for tracking whether comments count as immediately before
417 static int last_comment_line
;
418 static int last_non_comment_line
;
421 /* 2. Replace each comment that is not inside a string literal or regular
422 expression with a newline character. We need to remember the comment
423 for later, because it may be attached to a keyword string. */
429 static size_t bufmax
;
439 lineno
= line_number
;
440 /* Skip leading whitespace. */
446 if (c
!= ' ' && c
!= '\t' && c
!= '\r' && c
!= '\f')
452 /* Accumulate the comment. */
456 if (c
== '\n' || c
== EOF
)
458 if (buflen
>= bufmax
)
460 bufmax
= 2 * bufmax
+ 10;
461 buffer
= xrealloc (buffer
, bufmax
);
463 buffer
[buflen
++] = c
;
465 if (buflen
>= bufmax
)
467 bufmax
= 2 * bufmax
+ 10;
468 buffer
= xrealloc (buffer
, bufmax
);
470 buffer
[buflen
] = '\0';
471 /* Convert it to UTF-8. */
473 from_current_source_encoding (buffer
, logical_file_name
, lineno
);
474 /* Save it until we encounter the corresponding string. */
475 xgettext_current_source_encoding
= po_charset_utf8
;
476 xgettext_comment_add (utf8_string
);
477 xgettext_current_source_encoding
= xgettext_global_source_encoding
;
478 last_comment_line
= lineno
;
483 /* Supports only one pushback character. */
485 phase2_ungetc (int c
)
491 /* Whitespace recognition. */
493 #define case_whitespace \
494 case ' ': case '\t': case '\r': case '\n': case '\f'
497 is_whitespace (int c
)
499 return (c
== ' ' || c
== '\t' || c
== '\r' || c
== '\n' || c
== '\f');
503 /* ========================== Reading of tokens. ========================== */
509 token_type_lparen
, /* ( */
510 token_type_rparen
, /* ) */
511 token_type_comma
, /* , */
512 token_type_fat_comma
, /* => */
513 token_type_dereference
, /* , */
514 token_type_semicolon
, /* ; */
515 token_type_lbrace
, /* { */
516 token_type_rbrace
, /* } */
517 token_type_lbracket
, /* [ */
518 token_type_rbracket
, /* ] */
519 token_type_string
, /* quote-like */
520 token_type_named_op
, /* if, unless, while, ... */
521 token_type_variable
, /* $... */
522 token_type_symbol
, /* symbol, number */
523 token_type_regex_op
, /* s, tr, y, m. */
524 token_type_dot
, /* . */
525 token_type_other
, /* regexp, misc. operator */
526 /* The following are not really token types, but variants used by
528 token_type_keyword_symbol
/* keyword symbol */
530 typedef enum token_type_ty token_type_ty
;
532 /* Subtypes for strings, important for interpolation. */
535 string_type_verbatim
, /* "<<'EOF'", "m'...'", "s'...''...'",
536 "tr/.../.../", "y/.../.../". */
537 string_type_q
, /* "'..'", "q/.../". */
538 string_type_qq
, /* '"..."', "`...`", "qq/.../", "qx/.../",
540 string_type_qr
/* Not supported. */
543 /* Subtypes for symbols, important for dollar interpretation. */
546 symbol_type_none
, /* Nothing special. */
547 symbol_type_sub
, /* 'sub'. */
548 symbol_type_function
/* Function name after 'sub'. */
551 typedef struct token_ty token_ty
;
555 int sub_type
; /* for token_type_string, token_type_symbol */
556 char *string
; /* for: in encoding:
557 token_type_named_op ASCII
558 token_type_string UTF-8
559 token_type_symbol ASCII
560 token_type_variable global_source_encoding
567 token2string (const token_ty
*token
)
572 return "token_type_eof";
573 case token_type_lparen
:
574 return "token_type_lparen";
575 case token_type_rparen
:
576 return "token_type_rparen";
577 case token_type_comma
:
578 return "token_type_comma";
579 case token_type_fat_comma
:
580 return "token_type_fat_comma";
581 case token_type_dereference
:
582 return "token_type_dereference";
583 case token_type_semicolon
:
584 return "token_type_semicolon";
585 case token_type_lbrace
:
586 return "token_type_lbrace";
587 case token_type_rbrace
:
588 return "token_type_rbrace";
589 case token_type_lbracket
:
590 return "token_type_lbracket";
591 case token_type_rbracket
:
592 return "token_type_rbracket";
593 case token_type_string
:
594 return "token_type_string";
595 case token_type_named_op
:
596 return "token_type_named_op";
597 case token_type_variable
:
598 return "token_type_variable";
599 case token_type_symbol
:
600 return "token_type_symbol";
601 case token_type_regex_op
:
602 return "token_type_regex_op";
604 return "token_type_dot";
605 case token_type_other
:
606 return "token_type_other";
613 /* Free the memory pointed to by a 'struct token_ty'. */
615 free_token (token_ty
*tp
)
619 case token_type_named_op
:
620 case token_type_string
:
621 case token_type_symbol
:
622 case token_type_variable
:
631 /* Pass 1 of extracting quotes: Find the end of the string, regardless
632 of the semantics of the construct. Return the complete string,
633 including the starting and the trailing delimiter, with backslashes
634 removed where appropriate. */
636 extract_quotelike_pass1 (int delim
)
638 /* This function is called recursively. No way to allocate stuff
639 statically. Also alloca() is inappropriate due to limited stack
640 size on some platforms. So we use malloc(). */
642 char *buffer
= (char *) xmalloc (bufmax
);
647 buffer
[bufpos
++] = delim
;
649 /* Find the closing delimiter. */
664 default: /* "..." or '...' or |...| etc. */
666 counter_delim
= delim
;
672 int c
= phase1_getc ();
674 /* This round can produce 1 or 2 bytes. Ensure room for 2 bytes. */
675 if (bufpos
+ 2 > bufmax
)
677 bufmax
= 2 * bufmax
+ 10;
678 buffer
= xrealloc (buffer
, bufmax
);
681 if (c
== counter_delim
|| c
== EOF
)
683 buffer
[bufpos
++] = counter_delim
; /* will be stripped off later */
684 buffer
[bufpos
++] = '\0';
686 fprintf (stderr
, "PASS1: %s\n", buffer
);
691 if (nested
&& c
== delim
)
693 char *inner
= extract_quotelike_pass1 (delim
);
694 size_t len
= strlen (inner
);
696 /* Ensure room for len + 1 bytes. */
697 if (bufpos
+ len
>= bufmax
)
700 bufmax
= 2 * bufmax
+ 10;
701 while (bufpos
+ len
>= bufmax
);
702 buffer
= xrealloc (buffer
, bufmax
);
704 strcpy (buffer
+ bufpos
, inner
);
713 buffer
[bufpos
++] = '\\';
714 buffer
[bufpos
++] = '\\';
716 else if (c
== delim
|| c
== counter_delim
)
718 /* This is pass2 in Perl. */
719 buffer
[bufpos
++] = c
;
723 buffer
[bufpos
++] = '\\';
729 buffer
[bufpos
++] = c
;
734 /* Like extract_quotelike_pass1, but return the complete string in UTF-8
737 extract_quotelike_pass1_utf8 (int delim
)
739 char *string
= extract_quotelike_pass1 (delim
);
741 from_current_source_encoding (string
, logical_file_name
, line_number
);
742 if (utf8_string
!= string
)
748 /* ========= Reading of tokens and commands. Extracting strings. ========= */
751 /* There is an ambiguity about '/': It can start a division operator ('/' or
752 '/=') or it can start a regular expression. The distinction is important
753 because inside regular expressions, '#' loses its special meaning.
754 The distinction is possible depending on the parsing state: After a
755 variable or simple expression, it's a division operator; at the beginning
756 of an expression, it's a regexp. */
757 static bool prefer_division_over_regexp
;
759 /* Context lookup table. */
760 static flag_context_list_table_ty
*flag_context_list_table
;
763 /* Forward declaration of local functions. */
764 static void interpolate_keywords (message_list_ty
*mlp
, const char *string
,
766 static token_ty
*x_perl_lex (message_list_ty
*mlp
);
767 static void x_perl_unlex (token_ty
*tp
);
768 static bool extract_balanced (message_list_ty
*mlp
, int state
,
770 flag_context_ty outer_context
,
771 flag_context_list_iterator_ty context_iter
,
772 int arg_sg
, int arg_pl
);
775 /* Extract an unsigned hexadecimal number from STRING, considering at
776 most LEN bytes and place the result in *RESULT. Returns a pointer
777 to the first character past the hexadecimal number. */
779 extract_hex (const char *string
, size_t len
, unsigned int *result
)
785 for (i
= 0; i
< len
; i
++)
790 if (c
>= 'A' && c
<= 'F')
791 number
= c
- 'A' + 10;
792 else if (c
>= 'a' && c
<= 'f')
793 number
= c
- 'a' + 10;
794 else if (c
>= '0' && c
<= '9')
806 /* Extract an unsigned octal number from STRING, considering at
807 most LEN bytes and place the result in *RESULT. Returns a pointer
808 to the first character past the octal number. */
810 extract_oct (const char *string
, size_t len
, unsigned int *result
)
816 for (i
= 0; i
< len
; i
++)
821 if (c
>= '0' && c
<= '7')
833 /* Extract the various quotelike constructs except for <<EOF. See the
834 section "Gory details of parsing quoted constructs" in perlop.pod.
835 Return the resulting token in *tp; tp->type == token_type_string. */
837 extract_quotelike (token_ty
*tp
, int delim
)
839 char *string
= extract_quotelike_pass1_utf8 (delim
);
840 size_t len
= strlen (string
);
842 tp
->type
= token_type_string
;
843 /* Take the string without the delimiters at the start and at the end. */
846 string
[len
- 1] = '\0';
847 tp
->string
= xstrdup (string
+ 1);
851 /* Extract the quotelike constructs with double delimiters, like
852 s/[SEARCH]/[REPLACE]/. This function does not eat up trailing
853 modifiers (left to the caller).
854 Return the resulting token in *tp; tp->type == token_type_regex_op. */
856 extract_triple_quotelike (message_list_ty
*mlp
, token_ty
*tp
, int delim
,
861 tp
->type
= token_type_regex_op
;
863 string
= extract_quotelike_pass1_utf8 (delim
);
865 interpolate_keywords (mlp
, string
, line_number
);
868 if (delim
== '(' || delim
== '<' || delim
== '{' || delim
== '[')
870 /* The delimiter for the second string can be different, e.g.
871 s{SEARCH}{REPLACE} or s{SEARCH}/REPLACE/. See "man perlrequick". */
872 delim
= phase1_getc ();
873 while (is_whitespace (delim
))
875 /* The hash-sign is not a valid delimiter after whitespace, ergo
876 use phase2_getc() and not phase1_getc() now. */
877 delim
= phase2_getc ();
880 string
= extract_quotelike_pass1_utf8 (delim
);
882 interpolate_keywords (mlp
, string
, line_number
);
886 /* Perform pass 3 of quotelike extraction (interpolation).
887 *tp is a token of type token_type_string.
888 This function replaces tp->string. */
889 /* FIXME: Currently may writes null-bytes into the string. */
891 extract_quotelike_pass3 (token_ty
*tp
, int error_level
)
894 static int bufmax
= 0;
902 switch (tp
->sub_type
)
904 case string_type_verbatim
:
905 fprintf (stderr
, "Interpolating string_type_verbatim:\n");
908 fprintf (stderr
, "Interpolating string_type_q:\n");
911 fprintf (stderr
, "Interpolating string_type_qq:\n");
914 fprintf (stderr
, "Interpolating string_type_qr:\n");
917 fprintf (stderr
, "%s\n", tp
->string
);
918 if (tp
->sub_type
== string_type_verbatim
)
919 fprintf (stderr
, "---> %s\n", tp
->string
);
922 if (tp
->sub_type
== string_type_verbatim
)
925 /* Loop over tp->string, accumulating the expansion in buffer. */
934 /* Ensure room for 7 bytes, 6 (multi-)bytes plus a leading backslash
935 if \Q modifier is present. */
936 if (bufpos
+ 7 > bufmax
)
938 bufmax
= 2 * bufmax
+ 10;
939 buffer
= xrealloc (buffer
, bufmax
);
942 if (tp
->sub_type
== string_type_q
)
950 buffer
[bufpos
++] = '\\';
955 buffer
[bufpos
++] = *crs
++;
961 /* We only get here for double-quoted strings or regular expressions.
962 Unescape escape sequences. */
969 buffer
[bufpos
++] = '\t';
973 buffer
[bufpos
++] = '\n';
977 buffer
[bufpos
++] = '\r';
981 buffer
[bufpos
++] = '\f';
985 buffer
[bufpos
++] = '\b';
989 buffer
[bufpos
++] = '\a';
993 buffer
[bufpos
++] = 0x1b;
995 case '0': case '1': case '2': case '3':
996 case '4': case '5': case '6': case '7':
998 unsigned int oct_number
;
1001 crs
= extract_oct (crs
+ 1, 3, &oct_number
);
1003 /* FIXME: If one of the variables UPPERCASE or LOWERCASE is
1004 true, the character should be converted to its uppercase
1005 resp. lowercase equivalent. I don't know if the necessary
1006 facilities are already included in gettext. For US-Ascii
1007 the conversion can be already be done, however. */
1008 if (uppercase
&& oct_number
>= 'a' && oct_number
<= 'z')
1010 oct_number
= oct_number
- 'a' + 'A';
1012 else if (lowercase
&& oct_number
>= 'A' && oct_number
<= 'Z')
1014 oct_number
= oct_number
- 'A' + 'a';
1018 /* Yes, octal escape sequences in the range 0x100..0x1ff are
1020 length
= u8_uctomb ((unsigned char *) (buffer
+ bufpos
),
1028 unsigned int hex_number
= 0;
1034 const char *end
= strchr (crs
, '}');
1037 error_with_progname
= false;
1038 error (error_level
, 0, _("\
1039 %s:%d: missing right brace on \\x{HEXNUMBER}"), real_file_name
, line_number
);
1040 error_with_progname
= true;
1047 (void) extract_hex (crs
, end
- crs
, &hex_number
);
1053 crs
= extract_hex (crs
, 2, &hex_number
);
1056 /* FIXME: If one of the variables UPPERCASE or LOWERCASE is
1057 true, the character should be converted to its uppercase
1058 resp. lowercase equivalent. I don't know if the necessary
1059 facilities are already included in gettext. For US-Ascii
1060 the conversion can be already be done, however. */
1061 if (uppercase
&& hex_number
>= 'a' && hex_number
<= 'z')
1063 hex_number
= hex_number
- 'a' + 'A';
1065 else if (lowercase
&& hex_number
>= 'A' && hex_number
<= 'Z')
1067 hex_number
= hex_number
- 'A' + 'a';
1070 length
= u8_uctomb ((unsigned char *) (buffer
+ bufpos
),
1078 /* Perl's notion of control characters. */
1082 int the_char
= (unsigned char) *crs
;
1083 if (the_char
>= 'a' || the_char
<= 'z')
1084 the_char
= the_char
- 'a' + 'A';
1085 buffer
[bufpos
++] = the_char
^ 0x40;
1092 const char *end
= strchr (crs
+ 1, '}');
1096 unsigned int unicode
;
1098 name
= (char *) xmalloc (end
- (crs
+ 1) + 1);
1099 memcpy (name
, crs
+ 1, end
- (crs
+ 1));
1100 name
[end
- (crs
+ 1)] = '\0';
1102 unicode
= unicode_name_character (name
);
1103 if (unicode
!= UNINAME_INVALID
)
1105 /* FIXME: Convert to upper/lowercase if the
1106 corresponding flag is set to true. */
1108 u8_uctomb ((unsigned char *) (buffer
+ bufpos
),
1123 /* No escape sequence, go on. */
1151 if (*crs
>= 'A' && *crs
<= 'Z')
1153 buffer
[bufpos
++] = *crs
- 'A' + 'a';
1155 else if ((unsigned char) *crs
>= 0x80)
1157 error_with_progname
= false;
1158 error (error_level
, 0, _("\
1159 %s:%d: invalid interpolation (\"\\l\") of 8bit character \"%c\""),
1160 real_file_name
, line_number
, *crs
);
1161 error_with_progname
= true;
1165 buffer
[bufpos
++] = *crs
;
1171 if (*crs
>= 'a' && *crs
<= 'z')
1173 buffer
[bufpos
++] = *crs
- 'a' + 'A';
1175 else if ((unsigned char) *crs
>= 0x80)
1177 error_with_progname
= false;
1178 error (error_level
, 0, _("\
1179 %s:%d: invalid interpolation (\"\\u\") of 8bit character \"%c\""),
1180 real_file_name
, line_number
, *crs
);
1181 error_with_progname
= true;
1185 buffer
[bufpos
++] = *crs
;
1190 buffer
[bufpos
++] = *crs
;
1199 backslashed
= false;
1202 && !((*crs
>= 'A' && *crs
<= 'Z') || (*crs
>= 'A' && *crs
<= 'z')
1203 || (*crs
>= '0' && *crs
<= '9') || *crs
== '_'))
1205 buffer
[bufpos
++] = '\\';
1209 if (!backslashed
&& !extract_all
&& (*crs
== '$' || *crs
== '@'))
1211 error_with_progname
= false;
1212 error (error_level
, 0, _("\
1213 %s:%d: invalid variable interpolation at \"%c\""),
1214 real_file_name
, line_number
, *crs
);
1215 error_with_progname
= true;
1220 if (*crs
>= 'A' && *crs
<= 'Z')
1221 buffer
[bufpos
++] = *crs
- 'A' + 'a';
1222 else if ((unsigned char) *crs
>= 0x80)
1224 error_with_progname
= false;
1225 error (error_level
, 0, _("\
1226 %s:%d: invalid interpolation (\"\\L\") of 8bit character \"%c\""),
1227 real_file_name
, line_number
, *crs
);
1228 error_with_progname
= true;
1229 buffer
[bufpos
++] = *crs
;
1232 buffer
[bufpos
++] = *crs
;
1237 if (*crs
>= 'a' && *crs
<= 'z')
1238 buffer
[bufpos
++] = *crs
- 'a' + 'A';
1239 else if ((unsigned char) *crs
>= 0x80)
1241 error_with_progname
= false;
1242 error (error_level
, 0, _("\
1243 %s:%d: invalid interpolation (\"\\U\") of 8bit character \"%c\""),
1244 real_file_name
, line_number
, *crs
);
1245 error_with_progname
= true;
1246 buffer
[bufpos
++] = *crs
;
1249 buffer
[bufpos
++] = *crs
;
1254 buffer
[bufpos
++] = *crs
++;
1258 /* Ensure room for 1 more byte. */
1259 if (bufpos
>= bufmax
)
1261 bufmax
= 2 * bufmax
+ 10;
1262 buffer
= xrealloc (buffer
, bufmax
);
1265 buffer
[bufpos
++] = '\0';
1268 fprintf (stderr
, "---> %s\n", buffer
);
1271 /* Replace tp->string. */
1273 tp
->string
= xstrdup (buffer
);
1276 /* Parse a variable. This is done in several steps:
1277 1) Consume all leading occurencies of '$', '@', '%', and '*'.
1278 2) Determine the name of the variable from the following input.
1279 3) Parse possible following hash keys or array indexes.
1282 extract_variable (message_list_ty
*mlp
, token_ty
*tp
, int first
)
1284 static char *buffer
;
1285 static int bufmax
= 0;
1288 size_t varbody_length
= 0;
1289 bool maybe_hash_deref
= false;
1290 bool maybe_hash_value
= false;
1292 tp
->type
= token_type_variable
;
1295 fprintf (stderr
, "%s:%d: extracting variable type '%c'\n",
1296 real_file_name
, line_number
, first
);
1300 * 1) Consume dollars and so on (not euros ...). Unconditionally
1301 * accepting the hash sign (#) will maybe lead to inaccurate
1304 while (c
== '$' || c
== '*' || c
== '#' || c
== '@' || c
== '%')
1306 if (bufpos
>= bufmax
)
1308 bufmax
= 2 * bufmax
+ 10;
1309 buffer
= xrealloc (buffer
, bufmax
);
1311 buffer
[bufpos
++] = c
;
1317 tp
->type
= token_type_eof
;
1321 /* Hash references are treated in a special way, when looking for
1323 if (buffer
[0] == '$')
1326 maybe_hash_value
= true;
1327 else if (bufpos
== 2 && buffer
[1] == '$')
1330 || (c
>= 'A' && c
<= 'Z') || (c
>= 'a' && c
<= 'z')
1331 || (c
>= '0' && c
<= '9')
1332 || c
== '_' || c
== ':' || c
== '\'' || c
>= 0x80))
1334 /* Special variable $$ for pid. */
1335 if (bufpos
>= bufmax
)
1337 bufmax
= 2 * bufmax
+ 10;
1338 buffer
= xrealloc (buffer
, bufmax
);
1340 buffer
[bufpos
++] = '\0';
1341 tp
->string
= xstrdup (buffer
);
1343 fprintf (stderr
, "%s:%d: is PID ($$)\n",
1344 real_file_name
, line_number
);
1351 maybe_hash_deref
= true;
1357 * 2) Get the name of the variable. The first character is practically
1358 * arbitrary. Punctuation and numbers automagically put a variable
1359 * in the global namespace but that subtle difference is not interesting
1362 if (bufpos
>= bufmax
)
1364 bufmax
= 2 * bufmax
+ 10;
1365 buffer
= xrealloc (buffer
, bufmax
);
1369 /* Yuck, we cannot accept ${gettext} as a keyword... Except for
1370 * debugging purposes it is also harmless, that we suppress the
1371 * real name of the variable.
1374 fprintf (stderr
, "%s:%d: braced {variable_name}\n",
1375 real_file_name
, line_number
);
1378 if (extract_balanced (mlp
, 0, token_type_rbrace
,
1379 null_context
, null_context_list_iterator
, -1, -1))
1381 buffer
[bufpos
++] = c
;
1385 while ((c
>= 'A' && c
<= 'Z') || (c
>= 'a' && c
<= 'z')
1386 || (c
>= '0' && c
<= '9')
1387 || c
== '_' || c
== ':' || c
== '\'' || c
>= 0x80)
1390 if (bufpos
>= bufmax
)
1392 bufmax
= 2 * bufmax
+ 10;
1393 buffer
= xrealloc (buffer
, bufmax
);
1395 buffer
[bufpos
++] = c
;
1401 /* Probably some strange Perl variable like $`. */
1402 if (varbody_length
== 0)
1405 if (c
== EOF
|| is_whitespace (c
))
1406 phase1_ungetc (c
); /* Loser. */
1409 if (bufpos
>= bufmax
)
1411 bufmax
= 2 * bufmax
+ 10;
1412 buffer
= xrealloc (buffer
, bufmax
);
1414 buffer
[bufpos
++] = c
;
1418 if (bufpos
>= bufmax
)
1420 bufmax
= 2 * bufmax
+ 10;
1421 buffer
= xrealloc (buffer
, bufmax
);
1423 buffer
[bufpos
++] = '\0';
1425 tp
->string
= xstrdup (buffer
);
1428 fprintf (stderr
, "%s:%d: complete variable name: %s\n",
1429 real_file_name
, line_number
, tp
->string
);
1432 prefer_division_over_regexp
= true;
1435 * 3) If the following looks strange to you, this is valid Perl syntax:
1437 * $var = $$hashref # We can place a
1438 * # comment here and then ...
1439 * {key_into_hashref};
1441 * POD sections are not allowed but we leave complaints about
1442 * that to the compiler/interpreter.
1444 /* We only extract strings from the first hash key (if present). */
1446 if (maybe_hash_deref
|| maybe_hash_value
)
1448 bool is_dereference
= false;
1453 while (is_whitespace (c
));
1457 int c2
= phase1_getc ();
1461 is_dereference
= true;
1465 while (is_whitespace (c
));
1467 else if (c2
!= '\n')
1469 /* Discarding the newline is harmless here. The only
1470 special character recognized after a minus is greater-than
1471 for dereference. However, the sequence "-\n>" that we
1472 treat incorrectly here, is a syntax error. */
1477 if (maybe_hash_value
&& is_dereference
)
1480 fprintf (stderr
, "%s:%d: first keys preceded by \"->\"\n",
1481 real_file_name
, line_number
);
1484 else if (maybe_hash_value
)
1486 /* Fake it into a hash. */
1487 tp
->string
[0] = '%';
1490 /* Do NOT change that into else if (see above). */
1491 if ((maybe_hash_value
|| maybe_hash_deref
) && c
== '{')
1493 void *keyword_value
;
1496 fprintf (stderr
, "%s:%d: first keys preceded by '{'\n",
1497 real_file_name
, line_number
);
1500 if (find_entry (&keywords
, tp
->string
, strlen (tp
->string
),
1501 &keyword_value
) == 0)
1503 /* Extract a possible string from the key. Before proceeding
1504 we check whether the open curly is followed by a symbol and
1505 then by a right curly. */
1506 flag_context_list_iterator_ty context_iter
=
1507 flag_context_list_iterator (
1508 flag_context_list_table_lookup (
1509 flag_context_list_table
,
1510 tp
->string
, strlen (tp
->string
)));
1511 token_ty
*t1
= x_perl_lex (mlp
);
1514 fprintf (stderr
, "%s:%d: extracting string key\n",
1515 real_file_name
, line_number
);
1518 if (t1
->type
== token_type_symbol
1519 || t1
->type
== token_type_named_op
)
1521 token_ty
*t2
= x_perl_lex (mlp
);
1522 if (t2
->type
== token_type_rbrace
)
1524 flag_context_ty context
;
1528 inherited_context (null_context
,
1529 flag_context_list_iterator_advance (
1532 pos
.line_number
= line_number
;
1533 pos
.file_name
= logical_file_name
;
1535 xgettext_current_source_encoding
= po_charset_utf8
;
1536 remember_a_message (mlp
, xstrdup (t1
->string
), context
, &pos
);
1537 xgettext_current_source_encoding
= xgettext_global_source_encoding
;
1549 if (extract_balanced (mlp
, 1, token_type_rbrace
,
1550 null_context
, context_iter
, 1, -1))
1565 /* Now consume "->", "[...]", and "{...}". */
1568 int c
= phase2_getc ();
1575 fprintf (stderr
, "%s:%d: extracting balanced '{' after varname\n",
1576 real_file_name
, line_number
);
1578 extract_balanced (mlp
, 0, token_type_rbrace
,
1579 null_context
, null_context_list_iterator
, -1, -1);
1584 fprintf (stderr
, "%s:%d: extracting balanced '[' after varname\n",
1585 real_file_name
, line_number
);
1587 extract_balanced (mlp
, 0, token_type_rbracket
,
1588 null_context
, null_context_list_iterator
, -1, -1);
1592 c2
= phase1_getc ();
1596 fprintf (stderr
, "%s:%d: another \"->\" after varname\n",
1597 real_file_name
, line_number
);
1601 else if (c2
!= '\n')
1603 /* Discarding the newline is harmless here. The only
1604 special character recognized after a minus is greater-than
1605 for dereference. However, the sequence "-\n>" that we
1606 treat incorrectly here, is a syntax error. */
1613 fprintf (stderr
, "%s:%d: variable finished\n",
1614 real_file_name
, line_number
);
1622 /* Actually a simplified version of extract_variable(). It searches for
1623 variables inside a double-quoted string that may interpolate to
1624 some keyword hash (reference). The string is UTF-8 encoded. */
1626 interpolate_keywords (message_list_ty
*mlp
, const char *string
, int lineno
)
1628 static char *buffer
;
1629 static int bufmax
= 0;
1631 flag_context_ty context
;
1633 bool maybe_hash_deref
= false;
1655 * one_dollar: dollar sign seen in state INITIAL
1656 * two_dollars: another dollar-sign has been seen in state ONE_DOLLAR
1657 * identifier: a valid identifier character has been seen in state
1658 * ONE_DOLLAR or TWO_DOLLARS
1659 * minus: a minus-sign has been seen in state IDENTIFIER
1660 * wait_lbrace: a greater-than has been seen in state MINUS
1661 * wait_quote: a left brace has been seen in state IDENTIFIER or in
1663 * dquote: a double-quote has been seen in state WAIT_QUOTE
1664 * squote: a single-quote has been seen in state WAIT_QUOTE
1665 * barekey: an bareword character has been seen in state WAIT_QUOTE
1666 * wait_rbrace: closing quote has been seen in state DQUOTE or SQUOTE
1668 * In the states initial...identifier the context is null_context; in the
1669 * states minus...wait_rbrace the context is the one suitable for the first
1670 * argument of the last seen identifier.
1673 context
= null_context
;
1675 token
.type
= token_type_string
;
1676 token
.sub_type
= string_type_qq
;
1677 token
.line_number
= line_number
;
1678 pos
.file_name
= logical_file_name
;
1679 pos
.line_number
= lineno
;
1681 while ((c
= (unsigned char) *string
++) != '\0')
1683 void *keyword_value
;
1685 if (state
== initial
)
1691 if (bufpos
+ 1 >= bufmax
)
1693 bufmax
= 2 * bufmax
+ 10;
1694 buffer
= xrealloc (buffer
, bufmax
);
1703 c
= (unsigned char) *string
++;
1708 buffer
[bufpos
++] = '$';
1709 maybe_hash_deref
= false;
1721 * This is enough to make us believe later that we dereference
1724 maybe_hash_deref
= true;
1725 state
= two_dollars
;
1728 if (c
== '_' || c
== ':' || c
== '\'' || c
>= 0x80
1729 || (c
>= 'A' && c
<= 'Z')
1730 || (c
>= 'a' && c
<= 'z')
1731 || (c
>= '0' && c
<= '9'))
1733 buffer
[bufpos
++] = c
;
1742 if (c
== '_' || c
== ':' || c
== '\'' || c
>= 0x80
1743 || (c
>= 'A' && c
<= 'Z')
1744 || (c
>= 'a' && c
<= 'z')
1745 || (c
>= '0' && c
<= '9'))
1747 buffer
[bufpos
++] = c
;
1757 if (find_entry (&keywords
, buffer
, bufpos
, &keyword_value
) == 0)
1759 flag_context_list_iterator_ty context_iter
=
1760 flag_context_list_iterator (
1761 flag_context_list_table_lookup (
1762 flag_context_list_table
,
1765 inherited_context (null_context
,
1766 flag_context_list_iterator_advance (
1774 if (!maybe_hash_deref
)
1776 if (find_entry (&keywords
, buffer
, bufpos
, &keyword_value
) == 0)
1778 flag_context_list_iterator_ty context_iter
=
1779 flag_context_list_iterator (
1780 flag_context_list_table_lookup (
1781 flag_context_list_table
,
1784 inherited_context (null_context
,
1785 flag_context_list_iterator_advance (
1793 if (c
== '_' || c
== ':' || c
== '\'' || c
>= 0x80
1794 || (c
>= 'A' && c
<= 'Z')
1795 || (c
>= 'a' && c
<= 'z')
1796 || (c
>= '0' && c
<= '9'))
1798 buffer
[bufpos
++] = c
;
1809 state
= wait_lbrace
;
1812 context
= null_context
;
1824 context
= null_context
;
1835 pos
.line_number
= lineno
;
1840 pos
.line_number
= lineno
;
1845 if (c
== '_' || (c
>= '0' && c
<= '9') || c
>= 0x80
1846 || (c
>= 'A' && c
<= 'Z') || (c
>= 'a' && c
<= 'z'))
1848 pos
.line_number
= lineno
;
1850 buffer
[bufpos
++] = c
;
1855 context
= null_context
;
1865 /* The resulting string has to be interpolated twice. */
1866 buffer
[bufpos
] = '\0';
1867 token
.string
= xstrdup (buffer
);
1868 extract_quotelike_pass3 (&token
, EXIT_FAILURE
);
1869 /* The string can only shrink with interpolation (because
1871 if (!(strlen (token
.string
) <= bufpos
))
1873 strcpy (buffer
, token
.string
);
1874 free (token
.string
);
1875 state
= wait_rbrace
;
1878 if (string
[0] == '\"')
1880 buffer
[bufpos
++] = string
++[0];
1884 buffer
[bufpos
++] = '\\';
1885 buffer
[bufpos
++] = string
++[0];
1889 context
= null_context
;
1894 buffer
[bufpos
++] = c
;
1902 state
= wait_rbrace
;
1905 if (string
[0] == '\'')
1907 buffer
[bufpos
++] = string
++[0];
1911 buffer
[bufpos
++] = '\\';
1912 buffer
[bufpos
++] = string
++[0];
1916 context
= null_context
;
1921 buffer
[bufpos
++] = c
;
1926 if (c
== '_' || (c
>= '0' && c
<= '9') || c
>= 0x80
1927 || (c
>= 'A' && c
<= 'Z') || (c
>= 'a' && c
<= 'z'))
1929 buffer
[bufpos
++] = c
;
1932 else if (is_whitespace (c
))
1934 state
= wait_rbrace
;
1939 context
= null_context
;
1943 /* Must be right brace. */
1951 buffer
[bufpos
] = '\0';
1952 token
.string
= xstrdup (buffer
);
1953 extract_quotelike_pass3 (&token
, EXIT_FAILURE
);
1954 xgettext_current_source_encoding
= po_charset_utf8
;
1955 remember_a_message (mlp
, token
.string
, context
, &pos
);
1956 xgettext_current_source_encoding
= xgettext_global_source_encoding
;
1959 context
= null_context
;
1968 /* The last token seen in the token stream. This is important for the
1969 interpretation of '?' and '/'. */
1970 static token_type_ty last_token
;
1972 /* Combine characters into tokens. Discard whitespace. */
1975 x_perl_prelex (message_list_ty
*mlp
, token_ty
*tp
)
1977 static char *buffer
;
1985 tp
->line_number
= line_number
;
1990 tp
->type
= token_type_eof
;
1994 if (last_non_comment_line
> last_comment_line
)
1995 xgettext_comment_reset ();
1999 /* Ignore whitespace. */
2008 extract_variable (mlp
, tp
, c
);
2009 prefer_division_over_regexp
= true;
2015 last_non_comment_line
= tp
->line_number
;
2021 int c2
= phase1_getc ();
2025 tp
->type
= token_type_other
;
2026 prefer_division_over_regexp
= false;
2029 else if (c2
>= '0' && c2
<= '9')
2031 prefer_division_over_regexp
= false;
2035 tp
->type
= token_type_dot
;
2036 prefer_division_over_regexp
= true;
2041 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2042 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2043 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2044 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2047 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2048 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2049 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2050 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2052 case '0': case '1': case '2': case '3': case '4':
2053 case '5': case '6': case '7': case '8': case '9':
2054 /* Symbol, or part of a number. */
2055 prefer_division_over_regexp
= true;
2059 if (bufpos
>= bufmax
)
2061 bufmax
= 2 * bufmax
+ 10;
2062 buffer
= xrealloc (buffer
, bufmax
);
2064 buffer
[bufpos
++] = c
;
2068 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2069 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2070 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2071 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2074 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2075 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2076 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2077 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2079 case '0': case '1': case '2': case '3': case '4':
2080 case '5': case '6': case '7': case '8': case '9':
2089 if (bufpos
>= bufmax
)
2091 bufmax
= 2 * bufmax
+ 10;
2092 buffer
= xrealloc (buffer
, bufmax
);
2094 buffer
[bufpos
] = '\0';
2096 if (strcmp (buffer
, "__END__") == 0
2097 || strcmp (buffer
, "__DATA__") == 0)
2100 tp
->type
= token_type_eof
;
2103 else if (strcmp (buffer
, "and") == 0
2104 || strcmp (buffer
, "cmp") == 0
2105 || strcmp (buffer
, "eq") == 0
2106 || strcmp (buffer
, "if") == 0
2107 || strcmp (buffer
, "ge") == 0
2108 || strcmp (buffer
, "gt") == 0
2109 || strcmp (buffer
, "le") == 0
2110 || strcmp (buffer
, "lt") == 0
2111 || strcmp (buffer
, "ne") == 0
2112 || strcmp (buffer
, "not") == 0
2113 || strcmp (buffer
, "or") == 0
2114 || strcmp (buffer
, "unless") == 0
2115 || strcmp (buffer
, "while") == 0
2116 || strcmp (buffer
, "xor") == 0)
2118 tp
->type
= token_type_named_op
;
2119 tp
->string
= xstrdup (buffer
);
2120 prefer_division_over_regexp
= false;
2123 else if (strcmp (buffer
, "s") == 0
2124 || strcmp (buffer
, "y") == 0
2125 || strcmp (buffer
, "tr") == 0)
2127 int delim
= phase1_getc ();
2129 while (is_whitespace (delim
))
2130 delim
= phase2_getc ();
2134 tp
->type
= token_type_eof
;
2137 if ((delim
>= '0' && delim
<= '9')
2138 || (delim
>= 'A' && delim
<= 'Z')
2139 || (delim
>= 'a' && delim
<= 'z'))
2141 /* False positive. */
2142 phase2_ungetc (delim
);
2143 tp
->type
= token_type_symbol
;
2144 tp
->sub_type
= symbol_type_none
;
2145 tp
->string
= xstrdup (buffer
);
2146 prefer_division_over_regexp
= true;
2149 extract_triple_quotelike (mlp
, tp
, delim
,
2150 buffer
[0] == 's' && delim
!= '\'');
2152 /* Eat the following modifiers. */
2155 while (c
>= 'a' && c
<= 'z');
2159 else if (strcmp (buffer
, "m") == 0)
2161 int delim
= phase1_getc ();
2163 while (is_whitespace (delim
))
2164 delim
= phase2_getc ();
2168 tp
->type
= token_type_eof
;
2171 if ((delim
>= '0' && delim
<= '9')
2172 || (delim
>= 'A' && delim
<= 'Z')
2173 || (delim
>= 'a' && delim
<= 'z'))
2175 /* False positive. */
2176 phase2_ungetc (delim
);
2177 tp
->type
= token_type_symbol
;
2178 tp
->sub_type
= symbol_type_none
;
2179 tp
->string
= xstrdup (buffer
);
2180 prefer_division_over_regexp
= true;
2183 extract_quotelike (tp
, delim
);
2185 interpolate_keywords (mlp
, tp
->string
, line_number
);
2187 tp
->type
= token_type_regex_op
;
2188 prefer_division_over_regexp
= true;
2190 /* Eat the following modifiers. */
2193 while (c
>= 'a' && c
<= 'z');
2197 else if (strcmp (buffer
, "qq") == 0
2198 || strcmp (buffer
, "q") == 0
2199 || strcmp (buffer
, "qx") == 0
2200 || strcmp (buffer
, "qw") == 0
2201 || strcmp (buffer
, "qr") == 0)
2203 /* The qw (...) construct is not really a string but we
2204 can treat in the same manner and then pretend it is
2205 a symbol. Rationale: Saying "qw (foo bar)" is the
2206 same as "my @list = ('foo', 'bar'); @list;". */
2208 int delim
= phase1_getc ();
2210 while (is_whitespace (delim
))
2211 delim
= phase2_getc ();
2215 tp
->type
= token_type_eof
;
2218 prefer_division_over_regexp
= true;
2220 if ((delim
>= '0' && delim
<= '9')
2221 || (delim
>= 'A' && delim
<= 'Z')
2222 || (delim
>= 'a' && delim
<= 'z'))
2224 /* False positive. */
2225 phase2_ungetc (delim
);
2226 tp
->type
= token_type_symbol
;
2227 tp
->sub_type
= symbol_type_none
;
2228 tp
->string
= xstrdup (buffer
);
2229 prefer_division_over_regexp
= true;
2233 extract_quotelike (tp
, delim
);
2239 tp
->type
= token_type_string
;
2240 tp
->sub_type
= string_type_qq
;
2241 interpolate_keywords (mlp
, tp
->string
, line_number
);
2244 tp
->type
= token_type_regex_op
;
2247 tp
->type
= token_type_symbol
;
2248 tp
->sub_type
= symbol_type_none
;
2251 tp
->type
= token_type_string
;
2252 tp
->sub_type
= string_type_q
;
2259 else if (strcmp (buffer
, "grep") == 0
2260 || strcmp (buffer
, "split") == 0)
2262 prefer_division_over_regexp
= false;
2264 tp
->type
= token_type_symbol
;
2265 tp
->sub_type
= (strcmp (buffer
, "sub") == 0
2267 : symbol_type_none
);
2268 tp
->string
= xstrdup (buffer
);
2272 prefer_division_over_regexp
= true;
2273 extract_quotelike (tp
, c
);
2274 tp
->sub_type
= string_type_qq
;
2275 interpolate_keywords (mlp
, tp
->string
, line_number
);
2279 prefer_division_over_regexp
= true;
2280 extract_quotelike (tp
, c
);
2281 tp
->sub_type
= string_type_qq
;
2282 interpolate_keywords (mlp
, tp
->string
, line_number
);
2286 prefer_division_over_regexp
= true;
2287 extract_quotelike (tp
, c
);
2288 tp
->sub_type
= string_type_q
;
2294 /* Ignore empty list. */
2298 tp
->type
= token_type_lparen
;
2299 prefer_division_over_regexp
= false;
2303 tp
->type
= token_type_rparen
;
2304 prefer_division_over_regexp
= true;
2308 tp
->type
= token_type_lbrace
;
2309 prefer_division_over_regexp
= false;
2313 tp
->type
= token_type_rbrace
;
2314 prefer_division_over_regexp
= false;
2318 tp
->type
= token_type_lbracket
;
2319 prefer_division_over_regexp
= false;
2323 tp
->type
= token_type_rbracket
;
2324 prefer_division_over_regexp
= false;
2328 tp
->type
= token_type_semicolon
;
2329 prefer_division_over_regexp
= false;
2333 tp
->type
= token_type_comma
;
2334 prefer_division_over_regexp
= false;
2338 /* Check for fat comma. */
2342 tp
->type
= token_type_fat_comma
;
2345 else if (linepos
== 2
2346 && (last_token
== token_type_semicolon
2347 || last_token
== token_type_rbrace
)
2348 && ((c
>= 'A' && c
<='Z')
2349 || (c
>= 'a' && c
<= 'z')))
2352 fprintf (stderr
, "%s:%d: start pod section\n",
2353 real_file_name
, line_number
);
2357 fprintf (stderr
, "%s:%d: end pod section\n",
2358 real_file_name
, line_number
);
2363 tp
->type
= token_type_other
;
2364 prefer_division_over_regexp
= false;
2368 /* Check for <<EOF and friends. */
2369 prefer_division_over_regexp
= false;
2377 extract_quotelike (tp
, c
);
2378 string
= get_here_document (tp
->string
);
2380 tp
->string
= string
;
2381 tp
->type
= token_type_string
;
2382 tp
->sub_type
= string_type_verbatim
;
2383 tp
->line_number
= line_number
+ 1;
2389 extract_quotelike (tp
, c
);
2390 string
= get_here_document (tp
->string
);
2392 tp
->string
= string
;
2393 tp
->type
= token_type_string
;
2394 tp
->sub_type
= string_type_qq
;
2395 tp
->line_number
= line_number
+ 1;
2396 interpolate_keywords (mlp
, tp
->string
, line_number
+ 1);
2399 else if ((c
>= 'A' && c
<= 'Z')
2400 || (c
>= 'a' && c
<= 'z')
2404 while ((c
>= 'A' && c
<= 'Z')
2405 || (c
>= 'a' && c
<= 'z')
2406 || (c
>= '0' && c
<= '9')
2407 || c
== '_' || c
>= 0x80)
2409 if (bufpos
>= bufmax
)
2411 bufmax
= 2 * bufmax
+ 10;
2412 buffer
= xrealloc (buffer
, bufmax
);
2414 buffer
[bufpos
++] = c
;
2419 tp
->type
= token_type_eof
;
2426 if (bufpos
>= bufmax
)
2428 bufmax
= 2 * bufmax
+ 10;
2429 buffer
= xrealloc (buffer
, bufmax
);
2431 buffer
[bufpos
++] = '\0';
2432 string
= get_here_document (buffer
);
2433 tp
->string
= string
;
2434 tp
->type
= token_type_string
;
2435 tp
->sub_type
= string_type_qq
;
2436 tp
->line_number
= line_number
+ 1;
2437 interpolate_keywords (mlp
, tp
->string
, line_number
+ 1);
2443 tp
->type
= token_type_other
;
2450 tp
->type
= token_type_other
;
2452 return; /* End of case '>'. */
2455 /* Check for dereferencing operator. */
2459 tp
->type
= token_type_dereference
;
2462 else if ((c
>= 'A' && c
<= 'Z') || (c
>= 'a' && c
<= 'z'))
2464 /* One of the -X (filetest) functions. We play safe
2465 and accept all alphabetical characters here. */
2466 tp
->type
= token_type_other
;
2470 tp
->type
= token_type_other
;
2471 prefer_division_over_regexp
= false;
2476 if (!prefer_division_over_regexp
)
2478 extract_quotelike (tp
, c
);
2479 interpolate_keywords (mlp
, tp
->string
, line_number
);
2481 tp
->type
= token_type_other
;
2482 prefer_division_over_regexp
= true;
2483 /* Eat the following modifiers. */
2486 while (c
>= 'a' && c
<= 'z');
2493 /* We could carefully recognize each of the 2 and 3 character
2494 operators, but it is not necessary, as we only need to recognize
2495 gettext invocations. Don't bother. */
2496 tp
->type
= token_type_other
;
2497 prefer_division_over_regexp
= false;
2504 /* A token stack used as a lookahead buffer. */
2506 typedef struct token_stack_ty token_stack_ty
;
2507 struct token_stack_ty
2514 static struct token_stack_ty token_stack
;
2517 /* Dumps all resources allocated by stack STACK. */
2519 token_stack_dump (token_stack_ty
*stack
)
2523 fprintf (stderr
, "BEGIN STACK DUMP\n");
2524 for (i
= 0; i
< stack
->nitems
; i
++)
2526 token_ty
*token
= stack
->items
[i
];
2527 fprintf (stderr
, " [%s]\n", token2string (token
));
2528 switch (token
->type
)
2530 case token_type_named_op
:
2531 case token_type_string
:
2532 case token_type_symbol
:
2533 case token_type_variable
:
2534 fprintf (stderr
, " string: %s\n", token
->string
);
2538 fprintf (stderr
, "END STACK DUMP\n");
2543 /* Pushes the token TOKEN onto the stack STACK. */
2545 token_stack_push (token_stack_ty
*stack
, token_ty
*token
)
2547 if (stack
->nitems
>= stack
->nitems_max
)
2551 stack
->nitems_max
= 2 * stack
->nitems_max
+ 4;
2552 nbytes
= stack
->nitems_max
* sizeof (token_ty
*);
2553 stack
->items
= xrealloc (stack
->items
, nbytes
);
2555 stack
->items
[stack
->nitems
++] = token
;
2558 /* Pops the most recently pushed token from the stack STACK and returns it.
2559 Returns NULL if the stack is empty. */
2560 static inline token_ty
*
2561 token_stack_pop (token_stack_ty
*stack
)
2563 if (stack
->nitems
> 0)
2564 return stack
->items
[--(stack
->nitems
)];
2569 /* Return the top of the stack without removing it from the stack, or
2570 NULL if the stack is empty. */
2571 static inline token_ty
*
2572 token_stack_peek (const token_stack_ty
*stack
)
2574 if (stack
->nitems
> 0)
2575 return stack
->items
[stack
->nitems
- 1];
2580 /* Frees all resources allocated by stack STACK. */
2582 token_stack_free (token_stack_ty
*stack
)
2586 for (i
= 0; i
< stack
->nitems
; i
++)
2587 free_token (stack
->items
[i
]);
2588 free (stack
->items
);
2593 x_perl_lex (message_list_ty
*mlp
)
2596 int dummy
= token_stack_dump (&token_stack
);
2598 token_ty
*tp
= token_stack_pop (&token_stack
);
2602 tp
= (token_ty
*) xmalloc (sizeof (token_ty
));
2603 x_perl_prelex (mlp
, tp
);
2605 fprintf (stderr
, "%s:%d: x_perl_prelex returned %s\n",
2606 real_file_name
, line_number
, token2string (tp
));
2612 fprintf (stderr
, "%s:%d: %s recycled from stack\n",
2613 real_file_name
, line_number
, token2string (tp
));
2617 /* A symbol followed by a fat comma is really a single-quoted string.
2618 Function definitions or forward declarations also need a special
2619 handling because the dollars and at signs inside the parentheses
2620 must not be interpreted as the beginning of a variable ')'. */
2621 if (tp
->type
== token_type_symbol
|| tp
->type
== token_type_named_op
)
2623 token_ty
*next
= token_stack_peek (&token_stack
);
2628 fprintf (stderr
, "%s:%d: pre-fetching next token\n",
2629 real_file_name
, line_number
);
2631 next
= x_perl_lex (mlp
);
2632 x_perl_unlex (next
);
2634 fprintf (stderr
, "%s:%d: unshifted next token\n",
2635 real_file_name
, line_number
);
2640 fprintf (stderr
, "%s:%d: next token is %s\n",
2641 real_file_name
, line_number
, token2string (next
));
2644 if (next
->type
== token_type_fat_comma
)
2646 tp
->type
= token_type_string
;
2647 tp
->sub_type
= string_type_q
;
2650 "%s:%d: token %s mutated to token_type_string\n",
2651 real_file_name
, line_number
, token2string (tp
));
2654 else if (tp
->type
== token_type_symbol
&& tp
->sub_type
== symbol_type_sub
2655 && next
->type
== token_type_symbol
)
2657 /* Start of a function declaration or definition. Mark this
2658 symbol as a function name, so that we can later eat up
2659 possible prototype information. */
2661 fprintf (stderr
, "%s:%d: subroutine declaration/definition '%s'\n",
2662 real_file_name
, line_number
, next
->string
);
2664 next
->sub_type
= symbol_type_function
;
2666 else if (tp
->type
== token_type_symbol
2667 && (tp
->sub_type
== symbol_type_sub
2668 || tp
->sub_type
== symbol_type_function
)
2669 && next
->type
== token_type_lparen
)
2671 /* For simplicity we simply consume everything up to the
2672 closing parenthesis. Actually only a limited set of
2673 characters is allowed inside parentheses but we leave
2674 complaints to the interpreter and are prepared for
2675 future extensions to the Perl syntax. */
2679 fprintf (stderr
, "%s:%d: consuming prototype information\n",
2680 real_file_name
, line_number
);
2687 fprintf (stderr
, " consuming character '%c'\n", c
);
2690 while (c
!= EOF
&& c
!= ')');
2699 x_perl_unlex (token_ty
*tp
)
2701 token_stack_push (&token_stack
, tp
);
2705 /* ========================= Extracting strings. ========================== */
2707 /* Assuming TP is a string token, this function accumulates all subsequent
2708 . string2 . string3 ... to the string. (String concatenation.) */
2711 collect_message (message_list_ty
*mlp
, token_ty
*tp
, int error_level
)
2716 extract_quotelike_pass3 (tp
, error_level
);
2717 string
= xstrdup (tp
->string
);
2718 len
= strlen (tp
->string
) + 1;
2726 while (is_whitespace (c
));
2736 while (is_whitespace (c
));
2740 if (c
== '"' || c
== '\'' || c
== '`'
2741 || (!prefer_division_over_regexp
&& (c
== '/' || c
== '?'))
2744 token_ty
*qstring
= x_perl_lex (mlp
);
2745 if (qstring
->type
!= token_type_string
)
2747 /* assert (qstring->type == token_type_symbol) */
2748 x_perl_unlex (qstring
);
2752 extract_quotelike_pass3 (qstring
, error_level
);
2753 len
+= strlen (qstring
->string
);
2754 string
= xrealloc (string
, len
);
2755 strcat (string
, qstring
->string
);
2756 free_token (qstring
);
2761 /* The file is broken into tokens. Scan the token stream, looking for
2762 a keyword, followed by a left paren, followed by a string. When we
2763 see this sequence, we have something to remember. We assume we are
2764 looking at a valid C or C++ program, and leave the complaints about
2765 the grammar to the compiler.
2767 Normal handling: Look for
2768 keyword ( ... msgid ... )
2769 Plural handling: Look for
2770 keyword ( ... msgid ... msgid_plural ... )
2772 We use recursion because the arguments before msgid or between msgid
2773 and msgid_plural can contain subexpressions of the same form. */
2775 /* Extract messages until the next balanced closing parenthesis.
2776 Extracted messages are added to MLP.
2778 When specific arguments shall be extracted, ARG_SG and ARG_PL are
2779 set to the corresponding argument number or -1 if not applicable.
2781 Returns true for EOF, false otherwise.
2786 1 - keyword has been seen
2787 2 - extractable string has been seen
2788 3 - a dot operator after an extractable string has been seen
2790 States 2 and 3 are "fragile", the parser will remain in state 2
2791 as long as only opening parentheses are seen, a transition to
2792 state 3 is done on appearance of a dot operator, all other tokens
2793 will cause the parser to fall back to state 1 or 0, eventually
2794 with an error message about invalid intermixing of constant and
2795 non-constant strings.
2797 Likewise, state 3 is fragile. The parser will remain in state 3
2798 as long as only closing parentheses are seen, a transition to state
2799 2 is done on appearance of another (literal!) string, all other
2800 tokens will cause a warning. */
2803 extract_balanced (message_list_ty
*mlp
, int state
, token_type_ty delim
,
2804 flag_context_ty outer_context
,
2805 flag_context_list_iterator_ty context_iter
,
2806 int arg_sg
, int arg_pl
)
2808 /* Remember the message containing the msgid, for msgid_plural. */
2809 message_ty
*plural_mp
= NULL
;
2811 /* The current argument for a possibly extracted keyword. Counting
2815 /* Number of left parentheses seen. */
2818 /* Whether to implicitly assume the next tokens are arguments even without
2820 bool next_is_argument
= false;
2822 /* Context iterator that will be used if the next token is a '('. */
2823 flag_context_list_iterator_ty next_context_iter
=
2824 passthrough_context_list_iterator
;
2825 /* Current context. */
2826 flag_context_ty inner_context
=
2827 inherited_context (outer_context
,
2828 flag_context_list_iterator_advance (&context_iter
));
2831 static int nesting_level
= 0;
2836 last_token
= token_type_semicolon
; /* Safe assumption. */
2837 prefer_division_over_regexp
= false;
2841 int my_last_token
= last_token
;
2842 /* The current token. */
2845 tp
= x_perl_lex (mlp
);
2847 last_token
= tp
->type
;
2849 if (delim
== tp
->type
)
2852 fprintf (stderr
, "%s:%d: extract_balanced finished (%d)\n",
2853 logical_file_name
, tp
->line_number
, --nesting_level
);
2859 if (next_is_argument
&& tp
->type
!= token_type_lparen
)
2861 /* An argument list starts, even though there is no '('. */
2862 context_iter
= next_context_iter
;
2863 outer_context
= inner_context
;
2865 inherited_context (outer_context
,
2866 flag_context_list_iterator_advance (
2872 case token_type_symbol
:
2874 fprintf (stderr
, "%s:%d: type symbol (%d) \"%s\"\n",
2875 logical_file_name
, tp
->line_number
, nesting_level
,
2880 void *keyword_value
;
2882 if (find_entry (&keywords
, tp
->string
, strlen (tp
->string
),
2883 &keyword_value
) == 0)
2885 last_token
= token_type_keyword_symbol
;
2887 arg_sg
= (int) (long) keyword_value
& ((1 << 10) - 1);
2888 arg_pl
= (int) (long) keyword_value
>> 10;
2894 next_is_argument
= true;
2896 flag_context_list_iterator (
2897 flag_context_list_table_lookup (
2898 flag_context_list_table
,
2899 tp
->string
, strlen (tp
->string
)));
2902 case token_type_variable
:
2904 fprintf (stderr
, "%s:%d: type variable (%d) \"%s\"\n",
2905 logical_file_name
, tp
->line_number
, nesting_level
, tp
->string
);
2907 prefer_division_over_regexp
= true;
2908 next_is_argument
= false;
2909 next_context_iter
= null_context_list_iterator
;
2912 case token_type_lparen
:
2914 fprintf (stderr
, "%s:%d: type left parentheses (%d)\n",
2915 logical_file_name
, tp
->line_number
, nesting_level
);
2919 if (extract_balanced (mlp
, state
, token_type_rparen
,
2920 inner_context
, next_context_iter
,
2921 arg_sg
- arg_count
+ 1, arg_pl
- arg_count
+ 1))
2926 if (my_last_token
== token_type_keyword_symbol
)
2927 arg_sg
= arg_pl
= -1;
2928 next_is_argument
= false;
2929 next_context_iter
= null_context_list_iterator
;
2932 case token_type_rparen
:
2934 fprintf (stderr
, "%s:%d: type right parentheses(%d)\n",
2935 logical_file_name
, tp
->line_number
, nesting_level
);
2938 next_is_argument
= false;
2939 next_context_iter
= null_context_list_iterator
;
2942 case token_type_comma
:
2943 case token_type_fat_comma
:
2945 fprintf (stderr
, "%s:%d: type comma (%d)\n",
2946 logical_file_name
, tp
->line_number
, nesting_level
);
2949 if (arg_count
> arg_sg
&& arg_count
> arg_pl
)
2951 /* We have missed the argument. */
2952 arg_sg
= arg_pl
= -1;
2956 fprintf (stderr
, "%s:%d: arg_count: %d, arg_sg: %d, arg_pl: %d\n",
2957 real_file_name
, tp
->line_number
,
2958 arg_count
, arg_sg
, arg_pl
);
2961 inherited_context (outer_context
,
2962 flag_context_list_iterator_advance (
2964 next_is_argument
= false;
2965 next_context_iter
= passthrough_context_list_iterator
;
2968 case token_type_string
:
2970 fprintf (stderr
, "%s:%d: type string (%d): \"%s\"\n",
2971 logical_file_name
, tp
->line_number
, nesting_level
,
2980 pos
.file_name
= logical_file_name
;
2981 pos
.line_number
= tp
->line_number
;
2982 string
= collect_message (mlp
, tp
, EXIT_SUCCESS
);
2983 xgettext_current_source_encoding
= po_charset_utf8
;
2984 remember_a_message (mlp
, string
, inner_context
, &pos
);
2985 xgettext_current_source_encoding
= xgettext_global_source_encoding
;
2992 pos
.file_name
= logical_file_name
;
2993 pos
.line_number
= tp
->line_number
;
2995 if (arg_count
== arg_sg
)
2997 string
= collect_message (mlp
, tp
, EXIT_FAILURE
);
2998 xgettext_current_source_encoding
= po_charset_utf8
;
2999 plural_mp
= remember_a_message (mlp
, string
, inner_context
, &pos
);
3000 xgettext_current_source_encoding
= xgettext_global_source_encoding
;
3003 else if (arg_count
== arg_pl
)
3005 if (plural_mp
== NULL
)
3006 error (EXIT_FAILURE
, 0, _("\
3007 %s:%d: fatal: plural message seen before singular message\n"),
3008 real_file_name
, tp
->line_number
);
3010 string
= collect_message (mlp
, tp
, EXIT_FAILURE
);
3011 xgettext_current_source_encoding
= po_charset_utf8
;
3012 remember_a_message_plural (plural_mp
, string
, inner_context
, &pos
);
3013 xgettext_current_source_encoding
= xgettext_global_source_encoding
;
3018 if (arg_sg
== -1 && arg_pl
== -1)
3024 next_is_argument
= false;
3025 next_context_iter
= null_context_list_iterator
;
3028 case token_type_eof
:
3030 fprintf (stderr
, "%s:%d: type EOF (%d)\n",
3031 logical_file_name
, tp
->line_number
, nesting_level
);
3036 case token_type_lbrace
:
3038 fprintf (stderr
, "%s:%d: type lbrace (%d)\n",
3039 logical_file_name
, tp
->line_number
, nesting_level
);
3041 if (extract_balanced (mlp
, 0, token_type_rbrace
,
3042 null_context
, null_context_list_iterator
,
3048 next_is_argument
= false;
3049 next_context_iter
= null_context_list_iterator
;
3052 case token_type_rbrace
:
3054 fprintf (stderr
, "%s:%d: type rbrace (%d)\n",
3055 logical_file_name
, tp
->line_number
, nesting_level
);
3057 next_is_argument
= false;
3058 next_context_iter
= null_context_list_iterator
;
3062 case token_type_lbracket
:
3064 fprintf (stderr
, "%s:%d: type lbracket (%d)\n",
3065 logical_file_name
, tp
->line_number
, nesting_level
);
3067 if (extract_balanced (mlp
, 0, token_type_rbracket
,
3068 null_context
, null_context_list_iterator
,
3074 next_is_argument
= false;
3075 next_context_iter
= null_context_list_iterator
;
3078 case token_type_rbracket
:
3080 fprintf (stderr
, "%s:%d: type rbracket (%d)\n",
3081 logical_file_name
, tp
->line_number
, nesting_level
);
3083 next_is_argument
= false;
3084 next_context_iter
= null_context_list_iterator
;
3088 case token_type_semicolon
:
3090 fprintf (stderr
, "%s:%d: type semicolon (%d)\n",
3091 logical_file_name
, tp
->line_number
, nesting_level
);
3095 /* The ultimate sign. */
3096 arg_sg
= arg_pl
= -1;
3098 /* FIXME: Instead of resetting outer_context here, it may be better
3099 to recurse in the next_is_argument handling above, waiting for
3100 the next semicolon or other statement terminator. */
3101 outer_context
= null_context
;
3102 context_iter
= null_context_list_iterator
;
3103 next_is_argument
= false;
3104 next_context_iter
= passthrough_context_list_iterator
;
3106 inherited_context (outer_context
,
3107 flag_context_list_iterator_advance (
3111 case token_type_dereference
:
3113 fprintf (stderr
, "%s:%d: type dereference (%d)\n",
3114 logical_file_name
, tp
->line_number
, nesting_level
);
3116 next_is_argument
= false;
3117 next_context_iter
= null_context_list_iterator
;
3120 case token_type_dot
:
3122 fprintf (stderr
, "%s:%d: type dot (%d)\n",
3123 logical_file_name
, tp
->line_number
, nesting_level
);
3125 next_is_argument
= false;
3126 next_context_iter
= null_context_list_iterator
;
3130 case token_type_named_op
:
3132 fprintf (stderr
, "%s:%d: type named operator (%d): %s\n",
3133 logical_file_name
, tp
->line_number
, nesting_level
,
3136 next_is_argument
= false;
3137 next_context_iter
= null_context_list_iterator
;
3141 case token_type_regex_op
:
3143 fprintf (stderr
, "%s:%d: type regex operator (%d)\n",
3144 logical_file_name
, tp
->line_number
, nesting_level
);
3146 next_is_argument
= false;
3147 next_context_iter
= null_context_list_iterator
;
3150 case token_type_other
:
3152 fprintf (stderr
, "%s:%d: type other (%d)\n",
3153 logical_file_name
, tp
->line_number
, nesting_level
);
3155 next_is_argument
= false;
3156 next_context_iter
= null_context_list_iterator
;
3161 fprintf (stderr
, "%s:%d: unknown token type %d\n",
3162 real_file_name
, tp
->line_number
, tp
->type
);
3171 extract_perl (FILE *f
, const char *real_filename
, const char *logical_filename
,
3172 flag_context_list_table_ty
*flag_table
,
3173 msgdomain_list_ty
*mdlp
)
3175 message_list_ty
*mlp
= mdlp
->item
[0]->messages
;
3178 real_file_name
= real_filename
;
3179 logical_file_name
= xstrdup (logical_filename
);
3182 last_comment_line
= -1;
3183 last_non_comment_line
= -1;
3185 flag_context_list_table
= flag_table
;
3189 token_stack
.items
= NULL
;
3190 token_stack
.nitems
= 0;
3191 token_stack
.nitems_max
= 0;
3195 end_of_file
= false;
3197 /* Eat tokens until eof is seen. When extract_balanced returns
3198 due to an unbalanced closing brace, just restart it. */
3199 while (!extract_balanced (mlp
, 0, token_type_rbrace
,
3200 null_context
, null_context_list_iterator
,
3205 real_file_name
= NULL
;
3206 free (logical_file_name
);
3207 logical_file_name
= NULL
;
3209 last_token
= token_type_semicolon
;
3210 token_stack_free (&token_stack
);