Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / x-c.c
blobeaa0a9a4467a0bcb8299139021b2fc9e79688a68
1 /* xgettext C/C++/ObjectiveC backend.
2 Copyright (C) 1995-1998, 2000-2004 Free Software Foundation, Inc.
4 This file was written by Peter Miller <millerp@canb.auug.org.au>
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
24 #include <errno.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
30 #include "message.h"
31 #include "xgettext.h"
32 #include "x-c.h"
33 #include "error.h"
34 #include "error-progname.h"
35 #include "xalloc.h"
36 #include "exit.h"
37 #include "hash.h"
38 #include "gettext.h"
40 #define _(s) gettext(s)
42 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
45 /* The ANSI C standard defines several phases of translation:
47 1. Terminate line by \n, regardless of the external representation
48 of a text line. Stdio does this for us.
50 2. Convert trigraphs to their single character equivalents.
52 3. Concatenate each line ending in backslash (\) with the following
53 line.
55 4. Replace each comment with a space character.
57 5. Parse each resulting logical line as preprocessing tokens a
58 white space.
60 6. Recognize and carry out directives (it also expands macros on
61 non-directive lines, which we do not do here).
63 7. Replaces escape sequences within character strings with their
64 single character equivalents (we do this in step 5, because we
65 don't have to worry about the #include argument).
67 8. Concatenates adjacent string literals to form single string
68 literals (because we don't expand macros, there are a few things
69 we will miss).
71 9. Converts the remaining preprocessing tokens to C tokens and
72 discards any white space from the translation unit.
74 This lexer implements the above, and presents the scanner (in
75 xgettext.c) with a stream of C tokens. The comments are
76 accumulated in a buffer, and given to xgettext when asked for. */
79 /* ========================= Lexer customization. ========================= */
81 static bool trigraphs = false;
83 void
84 x_c_trigraphs ()
86 trigraphs = true;
90 /* ====================== Keyword set customization. ====================== */
92 /* If true extract all strings. */
93 static bool extract_all = false;
95 static hash_table c_keywords;
96 static hash_table objc_keywords;
97 static bool default_keywords = true;
100 void
101 x_c_extract_all ()
103 extract_all = true;
107 static void
108 add_keyword (const char *name, hash_table *keywords)
110 if (name == NULL)
111 default_keywords = false;
112 else
114 const char *end;
115 int argnum1;
116 int argnum2;
117 const char *colon;
119 if (keywords->table == NULL)
120 init_hash (keywords, 100);
122 split_keywordspec (name, &end, &argnum1, &argnum2);
124 /* The characters between name and end should form a valid C identifier.
125 A colon means an invalid parse in split_keywordspec(). */
126 colon = strchr (name, ':');
127 if (colon == NULL || colon >= end)
129 if (argnum1 == 0)
130 argnum1 = 1;
131 insert_entry (keywords, name, end - name,
132 (void *) (long) (argnum1 + (argnum2 << 10)));
137 void
138 x_c_keyword (const char *name)
140 add_keyword (name, &c_keywords);
143 void
144 x_objc_keyword (const char *name)
146 add_keyword (name, &objc_keywords);
149 /* Finish initializing the keywords hash tables.
150 Called after argument processing, before each file is processed. */
151 static void
152 init_keywords ()
154 if (default_keywords)
156 x_c_keyword ("gettext");
157 x_c_keyword ("dgettext:2");
158 x_c_keyword ("dcgettext:2");
159 x_c_keyword ("ngettext:1,2");
160 x_c_keyword ("dngettext:2,3");
161 x_c_keyword ("dcngettext:2,3");
162 x_c_keyword ("gettext_noop");
164 x_objc_keyword ("gettext");
165 x_objc_keyword ("dgettext:2");
166 x_objc_keyword ("dcgettext:2");
167 x_objc_keyword ("ngettext:1,2");
168 x_objc_keyword ("dngettext:2,3");
169 x_objc_keyword ("dcngettext:2,3");
170 x_objc_keyword ("gettext_noop");
171 x_objc_keyword ("NSLocalizedString"); /* similar to gettext */
172 x_objc_keyword ("_"); /* similar to gettext */
173 x_objc_keyword ("NSLocalizedStaticString"); /* similar to gettext_noop */
174 x_objc_keyword ("__"); /* similar to gettext_noop */
176 default_keywords = false;
180 void
181 init_flag_table_c ()
183 xgettext_record_flag ("gettext:1:pass-c-format");
184 xgettext_record_flag ("dgettext:2:pass-c-format");
185 xgettext_record_flag ("dcgettext:2:pass-c-format");
186 xgettext_record_flag ("ngettext:1:pass-c-format");
187 xgettext_record_flag ("ngettext:2:pass-c-format");
188 xgettext_record_flag ("dngettext:2:pass-c-format");
189 xgettext_record_flag ("dngettext:3:pass-c-format");
190 xgettext_record_flag ("dcngettext:2:pass-c-format");
191 xgettext_record_flag ("dcngettext:3:pass-c-format");
192 xgettext_record_flag ("gettext_noop:1:pass-c-format");
193 /* <stdio.h> */
194 xgettext_record_flag ("fprintf:2:c-format");
195 xgettext_record_flag ("vfprintf:2:c-format");
196 xgettext_record_flag ("printf:1:c-format");
197 xgettext_record_flag ("vprintf:1:c-format");
198 xgettext_record_flag ("sprintf:2:c-format");
199 xgettext_record_flag ("vsprintf:2:c-format");
200 xgettext_record_flag ("snprintf:3:c-format");
201 xgettext_record_flag ("vsnprintf:3:c-format");
202 #if 0 /* These functions are not standard. */
203 /* <stdio.h> */
204 xgettext_record_flag ("asprintf:2:c-format");
205 xgettext_record_flag ("vasprintf:2:c-format");
206 xgettext_record_flag ("dprintf:2:c-format");
207 xgettext_record_flag ("vdprintf:2:c-format");
208 xgettext_record_flag ("obstack_printf:2:c-format");
209 xgettext_record_flag ("obstack_vprintf:2:c-format");
210 /* <error.h> */
211 xgettext_record_flag ("error:3:c-format");
212 xgettext_record_flag ("error_at_line:5:c-format");
213 /* <argp.h> */
214 xgettext_record_flag ("argp_error:2:c-format");
215 xgettext_record_flag ("argp_failure:2:c-format");
216 #endif
219 void
220 init_flag_table_objc ()
222 /* Since the settings done in init_flag_table_c() also have an effect for
223 the ObjectiveC parser, we don't have to repeat them here. */
224 xgettext_record_flag ("gettext:1:pass-objc-format");
225 xgettext_record_flag ("dgettext:2:pass-objc-format");
226 xgettext_record_flag ("dcgettext:2:pass-objc-format");
227 xgettext_record_flag ("ngettext:1:pass-objc-format");
228 xgettext_record_flag ("ngettext:2:pass-objc-format");
229 xgettext_record_flag ("dngettext:2:pass-objc-format");
230 xgettext_record_flag ("dngettext:3:pass-objc-format");
231 xgettext_record_flag ("dcngettext:2:pass-objc-format");
232 xgettext_record_flag ("dcngettext:3:pass-objc-format");
233 xgettext_record_flag ("gettext_noop:1:pass-objc-format");
234 xgettext_record_flag ("NSLocalizedString:1:pass-c-format");
235 xgettext_record_flag ("NSLocalizedString:1:pass-objc-format");
236 xgettext_record_flag ("_:1:pass-c-format");
237 xgettext_record_flag ("_:1:pass-objc-format");
238 xgettext_record_flag ("stringWithFormat::1:objc-format");
239 xgettext_record_flag ("initWithFormat::1:objc-format");
240 xgettext_record_flag ("stringByAppendingFormat::1:objc-format");
241 xgettext_record_flag ("localizedStringWithFormat::1:objc-format");
242 xgettext_record_flag ("appendFormat::1:objc-format");
245 void
246 init_flag_table_gcc_internal ()
248 xgettext_record_flag ("gettext:1:pass-gcc-internal-format");
249 xgettext_record_flag ("dgettext:2:pass-gcc-internal-format");
250 xgettext_record_flag ("dcgettext:2:pass-gcc-internal-format");
251 xgettext_record_flag ("ngettext:1:pass-gcc-internal-format");
252 xgettext_record_flag ("ngettext:2:pass-gcc-internal-format");
253 xgettext_record_flag ("dngettext:2:pass-gcc-internal-format");
254 xgettext_record_flag ("dngettext:3:pass-gcc-internal-format");
255 xgettext_record_flag ("dcngettext:2:pass-gcc-internal-format");
256 xgettext_record_flag ("dcngettext:3:pass-gcc-internal-format");
257 xgettext_record_flag ("gettext_noop:1:pass-gcc-internal-format");
258 #if 0 /* This should better be done inside GCC. */
259 /* grepping for ATTRIBUTE_PRINTF in gcc-3.3/gcc/?*.h */
260 /* c-format.c */
261 xgettext_record_flag ("status_warning:2:gcc-internal-format");
262 /* c-tree.h */
263 xgettext_record_flag ("pedwarn_c99:1:pass-gcc-internal-format");
264 /* collect2.h */
265 //xgettext_record_flag ("error:1:c-format"); // 3 different versions
266 xgettext_record_flag ("notice:1:c-format");
267 //xgettext_record_flag ("fatal:1:c-format"); // 2 different versions
268 xgettext_record_flag ("fatal_perror:1:c-format");
269 /* cpplib.h */
270 xgettext_record_flag ("cpp_error:3:c-format");
271 xgettext_record_flag ("cpp_error_with_line:5:c-format");
272 /* diagnostic.h */
273 xgettext_record_flag ("diagnostic_set_info:2:pass-gcc-internal-format");
274 xgettext_record_flag ("output_printf:2:gcc-internal-format");
275 xgettext_record_flag ("output_verbatim:2:pass-gcc-internal-format");
276 xgettext_record_flag ("verbatim:1:gcc-internal-format");
277 xgettext_record_flag ("inform:1:pass-gcc-internal-format");
278 /* gcc.h */
279 //xgettext_record_flag ("fatal:1:c-format"); // 2 different versions
280 //xgettext_record_flag ("error:1:c-format"); // 3 different versions
281 /* genattrtab.h */
282 xgettext_record_flag ("attr_printf:2:pass-c-format");
283 /* gengtype.h */
284 xgettext_record_flag ("error_at_line:2:pass-c-format");
285 xgettext_record_flag ("xvasprintf:2:pass-c-format");
286 xgettext_record_flag ("xasprintf:1:pass-c-format");
287 xgettext_record_flag ("oprintf:2:pass-c-format");
288 /* gensupport.h */
289 xgettext_record_flag ("message_with_line:2:pass-c-format");
290 /* output.h */
291 xgettext_record_flag ("output_operand_lossage:1:c-format");
292 /* ra.h */
293 xgettext_record_flag ("ra_debug_msg:2:pass-c-format");
294 /* toplev.h */
295 xgettext_record_flag ("fnotice:2:c-format");
296 xgettext_record_flag ("fatal_io_error:2:gcc-internal-format");
297 xgettext_record_flag ("error_for_asm:2:pass-gcc-internal-format");
298 xgettext_record_flag ("warning_for_asm:2:pass-gcc-internal-format");
299 xgettext_record_flag ("error_with_file_and_line:3:pass-gcc-internal-format");
300 xgettext_record_flag ("error_with_decl:2:pass-gcc-internal-format");
301 xgettext_record_flag ("pedwarn:1:gcc-internal-format");
302 xgettext_record_flag ("pedwarn_with_file_and_line:3:gcc-internal-format");
303 xgettext_record_flag ("pedwarn_with_decl:2:gcc-internal-format");
304 xgettext_record_flag ("sorry:1:gcc-internal-format");
305 xgettext_record_flag ("error:1:pass-gcc-internal-format");
306 xgettext_record_flag ("fatal_error:1:pass-gcc-internal-format");
307 xgettext_record_flag ("internal_error:1:pass-gcc-internal-format");
308 xgettext_record_flag ("warning:1:pass-gcc-internal-format");
309 xgettext_record_flag ("warning_with_file_and_line:3:pass-gcc-internal-format");
310 xgettext_record_flag ("warning_with_decl:2:pass-gcc-internal-format");
311 /* f/com.h */
312 xgettext_record_flag ("ffecom_get_invented_identifier:1:pass-c-format");
313 /* f/sts.h */
314 xgettext_record_flag ("ffests_printf:2:pass-c-format");
315 /* java/java-tree.h */
316 xgettext_record_flag ("parse_error_context:2:pass-c-format");
317 #endif
321 /* ======================== Reading of characters. ======================== */
323 /* Real filename, used in error messages about the input file. */
324 static const char *real_file_name;
326 /* Logical filename and line number, used to label the extracted messages. */
327 static char *logical_file_name;
328 static int line_number;
330 /* The input file stream. */
331 static FILE *fp;
334 /* 0. Terminate line by \n, regardless whether the external representation of
335 a line terminator is LF (Unix), CR (Mac) or CR/LF (DOS/Windows).
336 It is debatable whether supporting CR/LF line terminators in C sources
337 on Unix is ISO C or POSIX compliant, but since GCC 3.3 now supports it
338 unconditionally, it must be OK.
339 The so-called "text mode" in stdio on DOS/Windows translates CR/LF to \n
340 automatically, but here we also need this conversion on Unix. As a side
341 effect, on DOS/Windows we also parse CR/CR/LF into a single \n, but this
342 is not a problem. */
345 static int
346 phase0_getc ()
348 int c;
350 c = getc (fp);
351 if (c == EOF)
353 if (ferror (fp))
354 error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
355 real_file_name);
356 return EOF;
359 if (c == '\r')
361 int c1 = getc (fp);
363 if (c1 != EOF && c1 != '\n')
364 ungetc (c1, fp);
366 /* Seen line terminator CR or CR/LF. */
367 return '\n';
370 return c;
374 /* Supports only one pushback character, and not '\n'. */
375 static inline void
376 phase0_ungetc (int c)
378 if (c != EOF)
379 ungetc (c, fp);
383 /* 1. line_number handling. Combine backslash-newline to nothing. */
385 static unsigned char phase1_pushback[2];
386 static int phase1_pushback_length;
389 static int
390 phase1_getc ()
392 int c;
394 if (phase1_pushback_length)
396 c = phase1_pushback[--phase1_pushback_length];
397 if (c == '\n')
398 ++line_number;
399 return c;
401 for (;;)
403 c = phase0_getc ();
404 switch (c)
406 case '\n':
407 ++line_number;
408 return '\n';
410 case '\\':
411 c = phase0_getc ();
412 if (c != '\n')
414 phase0_ungetc (c);
415 return '\\';
417 ++line_number;
418 break;
420 default:
421 return c;
427 /* Supports 2 characters of pushback. */
428 static void
429 phase1_ungetc (int c)
431 switch (c)
433 case EOF:
434 break;
436 case '\n':
437 --line_number;
438 /* FALLTHROUGH */
440 default:
441 if (phase1_pushback_length == SIZEOF (phase1_pushback))
442 abort ();
443 phase1_pushback[phase1_pushback_length++] = c;
444 break;
449 /* 2. Convert trigraphs to their single character equivalents. Most
450 sane human beings vomit copiously at the mention of trigraphs, which
451 is why they are an option. */
453 static unsigned char phase2_pushback[1];
454 static int phase2_pushback_length;
457 static int
458 phase2_getc ()
460 int c;
462 if (phase2_pushback_length)
463 return phase2_pushback[--phase2_pushback_length];
464 if (!trigraphs)
465 return phase1_getc ();
467 c = phase1_getc ();
468 if (c != '?')
469 return c;
470 c = phase1_getc ();
471 if (c != '?')
473 phase1_ungetc (c);
474 return '?';
476 c = phase1_getc ();
477 switch (c)
479 case '(':
480 return '[';
481 case '/':
482 return '\\';
483 case ')':
484 return ']';
485 case '\'':
486 return '^';
487 case '<':
488 return '{';
489 case '!':
490 return '|';
491 case '>':
492 return '}';
493 case '-':
494 return '~';
495 case '#':
496 return '=';
498 phase1_ungetc (c);
499 phase1_ungetc ('?');
500 return '?';
504 /* Supports only one pushback character. */
505 static void
506 phase2_ungetc (int c)
508 if (c != EOF)
510 if (phase2_pushback_length == SIZEOF (phase2_pushback))
511 abort ();
512 phase2_pushback[phase2_pushback_length++] = c;
517 /* 3. Concatenate each line ending in backslash (\) with the following
518 line. Basically, all you need to do is elide "\\\n" sequences from
519 the input. */
521 static unsigned char phase3_pushback[2];
522 static int phase3_pushback_length;
525 static int
526 phase3_getc ()
528 if (phase3_pushback_length)
529 return phase3_pushback[--phase3_pushback_length];
530 for (;;)
532 int c = phase2_getc ();
533 if (c != '\\')
534 return c;
535 c = phase2_getc ();
536 if (c != '\n')
538 phase2_ungetc (c);
539 return '\\';
545 /* Supports 2 characters of pushback. */
546 static void
547 phase3_ungetc (int c)
549 if (c != EOF)
551 if (phase3_pushback_length == SIZEOF (phase3_pushback))
552 abort ();
553 phase3_pushback[phase3_pushback_length++] = c;
558 /* Accumulating comments. */
560 static char *buffer;
561 static size_t bufmax;
562 static size_t buflen;
564 static inline void
565 comment_start ()
567 buflen = 0;
570 static inline void
571 comment_add (int c)
573 if (buflen >= bufmax)
575 bufmax = 2 * bufmax + 10;
576 buffer = xrealloc (buffer, bufmax);
578 buffer[buflen++] = c;
581 static inline void
582 comment_line_end (size_t chars_to_remove)
584 buflen -= chars_to_remove;
585 while (buflen >= 1
586 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
587 --buflen;
588 if (chars_to_remove == 0 && buflen >= bufmax)
590 bufmax = 2 * bufmax + 10;
591 buffer = xrealloc (buffer, bufmax);
593 buffer[buflen] = '\0';
594 savable_comment_add (buffer);
598 /* These are for tracking whether comments count as immediately before
599 keyword. */
600 static int last_comment_line;
601 static int last_non_comment_line;
602 static int newline_count;
605 /* 4. Replace each comment that is not inside a character constant or
606 string literal with a space character. We need to remember the
607 comment for later, because it may be attached to a keyword string.
608 We also optionally understand C++ comments. */
610 static int
611 phase4_getc ()
613 int c;
614 bool last_was_star;
616 c = phase3_getc ();
617 if (c != '/')
618 return c;
619 c = phase3_getc ();
620 switch (c)
622 default:
623 phase3_ungetc (c);
624 return '/';
626 case '*':
627 /* C comment. */
628 comment_start ();
629 last_was_star = false;
630 for (;;)
632 c = phase3_getc ();
633 if (c == EOF)
634 break;
635 /* We skip all leading white space, but not EOLs. */
636 if (!(buflen == 0 && (c == ' ' || c == '\t')))
637 comment_add (c);
638 switch (c)
640 case '\n':
641 comment_line_end (1);
642 comment_start ();
643 last_was_star = false;
644 continue;
646 case '*':
647 last_was_star = true;
648 continue;
650 case '/':
651 if (last_was_star)
653 comment_line_end (2);
654 break;
656 /* FALLTHROUGH */
658 default:
659 last_was_star = false;
660 continue;
662 break;
664 last_comment_line = newline_count;
665 return ' ';
667 case '/':
668 /* C++ or ISO C 99 comment. */
669 comment_start ();
670 for (;;)
672 c = phase3_getc ();
673 if (c == '\n' || c == EOF)
674 break;
675 /* We skip all leading white space, but not EOLs. */
676 if (!(buflen == 0 && (c == ' ' || c == '\t')))
677 comment_add (c);
679 comment_line_end (0);
680 last_comment_line = newline_count;
681 return '\n';
686 /* Supports only one pushback character. */
687 static void
688 phase4_ungetc (int c)
690 phase3_ungetc (c);
694 /* ========================== Reading of tokens. ========================== */
697 /* True if ObjectiveC extensions are recognized. */
698 static bool objc_extensions;
700 enum token_type_ty
702 token_type_character_constant, /* 'x' */
703 token_type_eof,
704 token_type_eoln,
705 token_type_hash, /* # */
706 token_type_lparen, /* ( */
707 token_type_rparen, /* ) */
708 token_type_comma, /* , */
709 token_type_colon, /* : */
710 token_type_name, /* abc */
711 token_type_number, /* 2.7 */
712 token_type_string_literal, /* "abc" */
713 token_type_symbol, /* < > = etc. */
714 token_type_objc_special, /* @ */
715 token_type_white_space
717 typedef enum token_type_ty token_type_ty;
719 typedef struct token_ty token_ty;
720 struct token_ty
722 token_type_ty type;
723 char *string; /* for token_type_name, token_type_string_literal */
724 refcounted_string_list_ty *comment; /* for token_type_string_literal,
725 token_type_objc_special */
726 long number;
727 int line_number;
731 /* 7. Replace escape sequences within character strings with their
732 single character equivalents. This is called from phase 5, because
733 we don't have to worry about the #include argument. There are
734 pathological cases which could bite us (like the DOS directory
735 separator), but just pretend it can't happen. */
737 #define P7_QUOTES (1000 + '"')
738 #define P7_QUOTE (1000 + '\'')
739 #define P7_NEWLINE (1000 + '\n')
741 static int
742 phase7_getc ()
744 int c, n, j;
746 /* Use phase 3, because phase 4 elides comments. */
747 c = phase3_getc ();
749 /* Return a magic newline indicator, so that we can distinguish
750 between the user requesting a newline in the string (e.g. using
751 "\n" or "\012") from the user failing to terminate the string or
752 character constant. The ANSI C standard says: 3.1.3.4 Character
753 Constants contain ``any character except single quote, backslash or
754 newline; or an escape sequence'' and 3.1.4 String Literals contain
755 ``any character except double quote, backslash or newline; or an
756 escape sequence''.
758 Most compilers give a fatal error in this case, however gcc is
759 stupidly silent, even though this is a very common typo. OK, so
760 gcc --pedantic will tell me, but that gripes about too much other
761 stuff. Could I have a ``gcc -Wnewline-in-string'' option, or
762 better yet a ``gcc -fno-newline-in-string'' option, please? Gcc is
763 also inconsistent between string literals and character constants:
764 you may not embed newlines in character constants; try it, you get
765 a useful diagnostic. --PMiller */
766 if (c == '\n')
767 return P7_NEWLINE;
769 if (c == '"')
770 return P7_QUOTES;
771 if (c == '\'')
772 return P7_QUOTE;
773 if (c != '\\')
774 return c;
775 c = phase3_getc ();
776 switch (c)
778 default:
779 /* Unknown escape sequences really should be an error, but just
780 ignore them, and let the real compiler complain. */
781 phase3_ungetc (c);
782 return '\\';
784 case '"':
785 case '\'':
786 case '?':
787 case '\\':
788 return c;
790 case 'a':
791 return '\a';
792 case 'b':
793 return '\b';
795 /* The \e escape is preculiar to gcc, and assumes an ASCII
796 character set (or superset). We don't provide support for it
797 here. */
799 case 'f':
800 return '\f';
801 case 'n':
802 return '\n';
803 case 'r':
804 return '\r';
805 case 't':
806 return '\t';
807 case 'v':
808 return '\v';
810 case 'x':
811 c = phase3_getc ();
812 switch (c)
814 default:
815 phase3_ungetc (c);
816 phase3_ungetc ('x');
817 return '\\';
819 case '0': case '1': case '2': case '3': case '4':
820 case '5': case '6': case '7': case '8': case '9':
821 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
822 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
823 break;
825 n = 0;
826 for (;;)
828 switch (c)
830 default:
831 phase3_ungetc (c);
832 return n;
834 case '0': case '1': case '2': case '3': case '4':
835 case '5': case '6': case '7': case '8': case '9':
836 n = n * 16 + c - '0';
837 break;
839 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
840 n = n * 16 + 10 + c - 'A';
841 break;
843 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
844 n = n * 16 + 10 + c - 'a';
845 break;
847 c = phase3_getc ();
849 return n;
851 case '0': case '1': case '2': case '3':
852 case '4': case '5': case '6': case '7':
853 n = 0;
854 for (j = 0; j < 3; ++j)
856 n = n * 8 + c - '0';
857 c = phase3_getc ();
858 switch (c)
860 default:
861 break;
863 case '0': case '1': case '2': case '3':
864 case '4': case '5': case '6': case '7':
865 continue;
867 break;
869 phase3_ungetc (c);
870 return n;
875 static void
876 phase7_ungetc (int c)
878 phase3_ungetc (c);
882 /* Free the memory pointed to by a 'struct token_ty'. */
883 static inline void
884 free_token (token_ty *tp)
886 if (tp->type == token_type_name || tp->type == token_type_string_literal)
887 free (tp->string);
888 if (tp->type == token_type_string_literal
889 || tp->type == token_type_objc_special)
890 drop_reference (tp->comment);
894 /* 5. Parse each resulting logical line as preprocessing tokens and
895 white space. Preprocessing tokens and C tokens don't always match. */
897 static token_ty phase5_pushback[1];
898 static int phase5_pushback_length;
901 static void
902 phase5_get (token_ty *tp)
904 static char *buffer;
905 static int bufmax;
906 int bufpos;
907 int c;
909 if (phase5_pushback_length)
911 *tp = phase5_pushback[--phase5_pushback_length];
912 return;
914 tp->string = NULL;
915 tp->number = 0;
916 tp->line_number = line_number;
917 c = phase4_getc ();
918 switch (c)
920 case EOF:
921 tp->type = token_type_eof;
922 return;
924 case '\n':
925 tp->type = token_type_eoln;
926 return;
928 case ' ':
929 case '\f':
930 case '\t':
931 for (;;)
933 c = phase4_getc ();
934 switch (c)
936 case ' ':
937 case '\f':
938 case '\t':
939 continue;
941 default:
942 phase4_ungetc (c);
943 break;
945 break;
947 tp->type = token_type_white_space;
948 return;
950 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
951 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
952 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
953 case 'V': case 'W': case 'X': case 'Y': case 'Z':
954 case '_':
955 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
956 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
957 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
958 case 'v': case 'w': case 'x': case 'y': case 'z':
959 bufpos = 0;
960 for (;;)
962 if (bufpos >= bufmax)
964 bufmax = 2 * bufmax + 10;
965 buffer = xrealloc (buffer, bufmax);
967 buffer[bufpos++] = c;
968 c = phase4_getc ();
969 switch (c)
971 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
972 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
973 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
974 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
975 case 'Y': case 'Z':
976 case '_':
977 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
978 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
979 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
980 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
981 case 'y': case 'z':
982 case '0': case '1': case '2': case '3': case '4':
983 case '5': case '6': case '7': case '8': case '9':
984 continue;
986 default:
987 phase4_ungetc (c);
988 break;
990 break;
992 if (bufpos >= bufmax)
994 bufmax = 2 * bufmax + 10;
995 buffer = xrealloc (buffer, bufmax);
997 buffer[bufpos] = 0;
998 tp->string = xstrdup (buffer);
999 tp->type = token_type_name;
1000 return;
1002 case '.':
1003 c = phase4_getc ();
1004 phase4_ungetc (c);
1005 switch (c)
1007 default:
1008 tp->type = token_type_symbol;
1009 return;
1011 case '0': case '1': case '2': case '3': case '4':
1012 case '5': case '6': case '7': case '8': case '9':
1013 c = '.';
1014 break;
1016 /* FALLTHROUGH */
1018 case '0': case '1': case '2': case '3': case '4':
1019 case '5': case '6': case '7': case '8': case '9':
1020 /* The preprocessing number token is more "generous" than the C
1021 number tokens. This is mostly due to token pasting (another
1022 thing we can ignore here). */
1023 bufpos = 0;
1024 for (;;)
1026 if (bufpos >= bufmax)
1028 bufmax = 2 * bufmax + 10;
1029 buffer = xrealloc (buffer, bufmax);
1031 buffer[bufpos++] = c;
1032 c = phase4_getc ();
1033 switch (c)
1035 case 'e':
1036 case 'E':
1037 if (bufpos >= bufmax)
1039 bufmax = 2 * bufmax + 10;
1040 buffer = xrealloc (buffer, bufmax);
1042 buffer[bufpos++] = c;
1043 c = phase4_getc ();
1044 if (c != '+' || c != '-')
1046 phase4_ungetc (c);
1047 break;
1049 continue;
1051 case 'A': case 'B': case 'C': case 'D': case 'F':
1052 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1053 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1054 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1055 case 'Y': case 'Z':
1056 case 'a': case 'b': case 'c': case 'd': case 'f':
1057 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1058 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1059 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1060 case 'y': case 'z':
1061 case '0': case '1': case '2': case '3': case '4':
1062 case '5': case '6': case '7': case '8': case '9':
1063 case '.':
1064 continue;
1066 default:
1067 phase4_ungetc (c);
1068 break;
1070 break;
1072 if (bufpos >= bufmax)
1074 bufmax = 2 * bufmax + 10;
1075 buffer = xrealloc (buffer, bufmax);
1077 buffer[bufpos] = 0;
1078 tp->type = token_type_number;
1079 tp->number = atol (buffer);
1080 return;
1082 case '\'':
1083 /* We could worry about the 'L' before wide character constants,
1084 but ignoring it has no effect unless one of the keywords is
1085 "L". Just pretend it won't happen. Also, we don't need to
1086 remember the character constant. */
1087 for (;;)
1089 c = phase7_getc ();
1090 if (c == P7_NEWLINE)
1092 error_with_progname = false;
1093 error (0, 0, _("%s:%d: warning: unterminated character constant"),
1094 logical_file_name, line_number - 1);
1095 error_with_progname = true;
1096 phase7_ungetc ('\n');
1097 break;
1099 if (c == EOF || c == P7_QUOTE)
1100 break;
1102 tp->type = token_type_character_constant;
1103 return;
1105 case '"':
1106 /* We could worry about the 'L' before wide string constants,
1107 but since gettext's argument is not a wide character string,
1108 let the compiler complain about the argument not matching the
1109 prototype. Just pretend it won't happen. */
1110 bufpos = 0;
1111 for (;;)
1113 c = phase7_getc ();
1114 if (c == P7_NEWLINE)
1116 error_with_progname = false;
1117 error (0, 0, _("%s:%d: warning: unterminated string literal"),
1118 logical_file_name, line_number - 1);
1119 error_with_progname = true;
1120 phase7_ungetc ('\n');
1121 break;
1123 if (c == EOF || c == P7_QUOTES)
1124 break;
1125 if (c == P7_QUOTE)
1126 c = '\'';
1127 if (bufpos >= bufmax)
1129 bufmax = 2 * bufmax + 10;
1130 buffer = xrealloc (buffer, bufmax);
1132 buffer[bufpos++] = c;
1134 if (bufpos >= bufmax)
1136 bufmax = 2 * bufmax + 10;
1137 buffer = xrealloc (buffer, bufmax);
1139 buffer[bufpos] = 0;
1140 tp->type = token_type_string_literal;
1141 tp->string = xstrdup (buffer);
1142 tp->comment = add_reference (savable_comment);
1143 return;
1145 case '(':
1146 tp->type = token_type_lparen;
1147 return;
1149 case ')':
1150 tp->type = token_type_rparen;
1151 return;
1153 case ',':
1154 tp->type = token_type_comma;
1155 return;
1157 case '#':
1158 tp->type = token_type_hash;
1159 return;
1161 case ':':
1162 tp->type = token_type_colon;
1163 return;
1165 case '@':
1166 if (objc_extensions)
1168 tp->type = token_type_objc_special;
1169 tp->comment = add_reference (savable_comment);
1170 return;
1172 /* FALLTHROUGH */
1174 default:
1175 /* We could carefully recognize each of the 2 and 3 character
1176 operators, but it is not necessary, as we only need to recognize
1177 gettext invocations. Don't bother. */
1178 tp->type = token_type_symbol;
1179 return;
1184 /* Supports only one pushback token. */
1185 static void
1186 phase5_unget (token_ty *tp)
1188 if (tp->type != token_type_eof)
1190 if (phase5_pushback_length == SIZEOF (phase5_pushback))
1191 abort ();
1192 phase5_pushback[phase5_pushback_length++] = *tp;
1197 /* X. Recognize a leading # symbol. Leave leading hash as a hash, but
1198 turn hash in the middle of a line into a plain symbol token. This
1199 makes the phase 6 easier. */
1201 static void
1202 phaseX_get (token_ty *tp)
1204 static bool middle; /* false at the beginning of a line, true otherwise. */
1206 phase5_get (tp);
1208 if (tp->type == token_type_eoln || tp->type == token_type_eof)
1209 middle = false;
1210 else
1212 if (middle)
1214 /* Turn hash in the middle of a line into a plain symbol token. */
1215 if (tp->type == token_type_hash)
1216 tp->type = token_type_symbol;
1218 else
1220 /* When we see leading whitespace followed by a hash sign,
1221 discard the leading white space token. The hash is all
1222 phase 6 is interested in. */
1223 if (tp->type == token_type_white_space)
1225 token_ty next;
1227 phase5_get (&next);
1228 if (next.type == token_type_hash)
1229 *tp = next;
1230 else
1231 phase5_unget (&next);
1233 middle = true;
1239 /* 6. Recognize and carry out directives (it also expands macros on
1240 non-directive lines, which we do not do here). The only directive
1241 we care about are the #line and #define directive. We throw all the
1242 others away. */
1244 static token_ty phase6_pushback[2];
1245 static int phase6_pushback_length;
1248 static void
1249 phase6_get (token_ty *tp)
1251 static token_ty *buf;
1252 static int bufmax;
1253 int bufpos;
1254 int j;
1256 if (phase6_pushback_length)
1258 *tp = phase6_pushback[--phase6_pushback_length];
1259 return;
1261 for (;;)
1263 /* Get the next token. If it is not a '#' at the beginning of a
1264 line (ignoring whitespace), return immediately. */
1265 phaseX_get (tp);
1266 if (tp->type != token_type_hash)
1267 return;
1269 /* Accumulate the rest of the directive in a buffer, until the
1270 "define" keyword is seen or until end of line. */
1271 bufpos = 0;
1272 for (;;)
1274 phaseX_get (tp);
1275 if (tp->type == token_type_eoln || tp->type == token_type_eof)
1276 break;
1278 /* Before the "define" keyword and inside other directives
1279 white space is irrelevant. So just throw it away. */
1280 if (tp->type != token_type_white_space)
1282 /* If it is a #define directive, return immediately,
1283 thus treating the body of the #define directive like
1284 normal input. */
1285 if (bufpos == 0
1286 && tp->type == token_type_name
1287 && strcmp (tp->string, "define") == 0)
1288 return;
1290 /* Accumulate. */
1291 if (bufpos >= bufmax)
1293 bufmax = 2 * bufmax + 10;
1294 buf = xrealloc (buf, bufmax * sizeof (buf[0]));
1296 buf[bufpos++] = *tp;
1300 /* If it is a #line directive, with no macros to expand, act on
1301 it. Ignore all other directives. */
1302 if (bufpos >= 3 && buf[0].type == token_type_name
1303 && strcmp (buf[0].string, "line") == 0
1304 && buf[1].type == token_type_number
1305 && buf[2].type == token_type_string_literal)
1307 logical_file_name = xstrdup (buf[2].string);
1308 line_number = buf[1].number;
1310 if (bufpos >= 2 && buf[0].type == token_type_number
1311 && buf[1].type == token_type_string_literal)
1313 logical_file_name = xstrdup (buf[1].string);
1314 line_number = buf[0].number;
1317 /* Release the storage held by the directive. */
1318 for (j = 0; j < bufpos; ++j)
1319 free_token (&buf[j]);
1321 /* We must reset the selected comments. */
1322 savable_comment_reset ();
1327 /* Supports 2 tokens of pushback. */
1328 static void
1329 phase6_unget (token_ty *tp)
1331 if (tp->type != token_type_eof)
1333 if (phase6_pushback_length == SIZEOF (phase6_pushback))
1334 abort ();
1335 phase6_pushback[phase6_pushback_length++] = *tp;
1340 /* 8a. Convert ISO C 99 section 7.8.1 format string directives to string
1341 literal placeholders. */
1343 /* Test for an ISO C 99 section 7.8.1 format string directive. */
1344 static bool
1345 is_inttypes_macro (const char *name)
1347 /* Syntax:
1348 P R I { d | i | o | u | x | X }
1349 { { | LEAST | FAST } { 8 | 16 | 32 | 64 } | MAX | PTR } */
1350 if (name[0] == 'P' && name[1] == 'R' && name[2] == 'I')
1352 name += 3;
1353 if (name[0] == 'd' || name[0] == 'i' || name[0] == 'o' || name[0] == 'u'
1354 || name[0] == 'x' || name[0] == 'X')
1356 name += 1;
1357 if (name[0] == 'M' && name[1] == 'A' && name[2] == 'X'
1358 && name[3] == '\0')
1359 return true;
1360 if (name[0] == 'P' && name[1] == 'T' && name[2] == 'R'
1361 && name[3] == '\0')
1362 return true;
1363 if (name[0] == 'L' && name[1] == 'E' && name[2] == 'A'
1364 && name[3] == 'S' && name[4] == 'T')
1365 name += 5;
1366 else if (name[0] == 'F' && name[1] == 'A' && name[2] == 'S'
1367 && name[3] == 'T')
1368 name += 4;
1369 if (name[0] == '8' && name[1] == '\0')
1370 return true;
1371 if (name[0] == '1' && name[1] == '6' && name[2] == '\0')
1372 return true;
1373 if (name[0] == '3' && name[1] == '2' && name[2] == '\0')
1374 return true;
1375 if (name[0] == '6' && name[1] == '4' && name[2] == '\0')
1376 return true;
1379 return false;
1382 static void
1383 phase8a_get (token_ty *tp)
1385 phase6_get (tp);
1386 if (tp->type == token_type_name && is_inttypes_macro (tp->string))
1388 /* Turn PRIdXXX into "<PRIdXXX>". */
1389 size_t len = strlen (tp->string);
1390 char *new_string = (char *) xmalloc (len + 3);
1391 new_string[0] = '<';
1392 memcpy (new_string + 1, tp->string, len);
1393 new_string[len + 1] = '>';
1394 new_string[len + 2] = '\0';
1395 free (tp->string);
1396 tp->string = new_string;
1397 tp->comment = add_reference (savable_comment);
1398 tp->type = token_type_string_literal;
1402 /* Supports 2 tokens of pushback. */
1403 static inline void
1404 phase8a_unget (token_ty *tp)
1406 phase6_unget (tp);
1410 /* 8b. Drop whitespace. */
1411 static void
1412 phase8b_get (token_ty *tp)
1414 for (;;)
1416 phase8a_get (tp);
1418 if (tp->type == token_type_white_space)
1419 continue;
1420 if (tp->type == token_type_eoln)
1422 /* We have to track the last occurrence of a string. One
1423 mode of xgettext allows to group an extracted message
1424 with a comment for documentation. The rule which states
1425 which comment is assumed to be grouped with the message
1426 says it should immediately precede it. Our
1427 interpretation: between the last line of the comment and
1428 the line in which the keyword is found must be no line
1429 with non-white space tokens. */
1430 ++newline_count;
1431 if (last_non_comment_line > last_comment_line)
1432 savable_comment_reset ();
1433 continue;
1435 break;
1439 /* Supports 2 tokens of pushback. */
1440 static inline void
1441 phase8b_unget (token_ty *tp)
1443 phase8a_unget (tp);
1447 /* 8c. In ObjectiveC mode, drop '@' before a literal string. We need to
1448 do this before performing concatenation of adjacent string literals. */
1449 static void
1450 phase8c_get (token_ty *tp)
1452 token_ty tmp;
1454 phase8b_get (tp);
1455 if (tp->type != token_type_objc_special)
1456 return;
1457 phase8b_get (&tmp);
1458 if (tmp.type != token_type_string_literal)
1460 phase8b_unget (&tmp);
1461 return;
1463 /* Drop the '@' token and return immediately the following string. */
1464 drop_reference (tmp.comment);
1465 tmp.comment = tp->comment;
1466 *tp = tmp;
1469 /* Supports only one pushback token. */
1470 static inline void
1471 phase8c_unget (token_ty *tp)
1473 phase8b_unget (tp);
1477 /* 8. Concatenate adjacent string literals to form single string
1478 literals (because we don't expand macros, there are a few things we
1479 will miss). */
1481 static void
1482 phase8_get (token_ty *tp)
1484 phase8c_get (tp);
1485 if (tp->type != token_type_string_literal)
1486 return;
1487 for (;;)
1489 token_ty tmp;
1490 size_t len;
1492 phase8c_get (&tmp);
1493 if (tmp.type != token_type_string_literal)
1495 phase8c_unget (&tmp);
1496 return;
1498 len = strlen (tp->string);
1499 tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1);
1500 strcpy (tp->string + len, tmp.string);
1501 free (tmp.string);
1506 /* ===================== Reading of high-level tokens. ==================== */
1509 enum xgettext_token_type_ty
1511 xgettext_token_type_eof,
1512 xgettext_token_type_keyword,
1513 xgettext_token_type_symbol,
1514 xgettext_token_type_lparen,
1515 xgettext_token_type_rparen,
1516 xgettext_token_type_comma,
1517 xgettext_token_type_colon,
1518 xgettext_token_type_string_literal,
1519 xgettext_token_type_other
1521 typedef enum xgettext_token_type_ty xgettext_token_type_ty;
1523 typedef struct xgettext_token_ty xgettext_token_ty;
1524 struct xgettext_token_ty
1526 xgettext_token_type_ty type;
1528 /* These fields are used only for xgettext_token_type_keyword. */
1529 int argnum1;
1530 int argnum2;
1532 /* This field is used only for xgettext_token_type_string_literal,
1533 xgettext_token_type_keyword, xgettext_token_type_symbol. */
1534 char *string;
1536 /* This field is used only for xgettext_token_type_string_literal. */
1537 refcounted_string_list_ty *comment;
1539 /* These fields are only for
1540 xgettext_token_type_keyword,
1541 xgettext_token_type_string_literal. */
1542 lex_pos_ty pos;
1546 /* 9. Convert the remaining preprocessing tokens to C tokens and
1547 discards any white space from the translation unit. */
1549 static void
1550 x_c_lex (xgettext_token_ty *tp)
1552 for (;;)
1554 token_ty token;
1555 void *keyword_value;
1557 phase8_get (&token);
1558 switch (token.type)
1560 case token_type_eof:
1561 tp->type = xgettext_token_type_eof;
1562 return;
1564 case token_type_name:
1565 last_non_comment_line = newline_count;
1567 if (find_entry (objc_extensions ? &objc_keywords : &c_keywords,
1568 token.string, strlen (token.string), &keyword_value)
1569 == 0)
1571 tp->type = xgettext_token_type_keyword;
1572 tp->argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
1573 tp->argnum2 = (int) (long) keyword_value >> 10;
1574 tp->pos.file_name = logical_file_name;
1575 tp->pos.line_number = token.line_number;
1577 else
1578 tp->type = xgettext_token_type_symbol;
1579 tp->string = token.string;
1580 return;
1582 case token_type_lparen:
1583 last_non_comment_line = newline_count;
1585 tp->type = xgettext_token_type_lparen;
1586 return;
1588 case token_type_rparen:
1589 last_non_comment_line = newline_count;
1591 tp->type = xgettext_token_type_rparen;
1592 return;
1594 case token_type_comma:
1595 last_non_comment_line = newline_count;
1597 tp->type = xgettext_token_type_comma;
1598 return;
1600 case token_type_colon:
1601 last_non_comment_line = newline_count;
1603 tp->type = xgettext_token_type_colon;
1604 return;
1606 case token_type_string_literal:
1607 last_non_comment_line = newline_count;
1609 tp->type = xgettext_token_type_string_literal;
1610 tp->string = token.string;
1611 tp->comment = token.comment;
1612 tp->pos.file_name = logical_file_name;
1613 tp->pos.line_number = token.line_number;
1614 return;
1616 case token_type_objc_special:
1617 drop_reference (token.comment);
1618 /* FALLTHROUGH */
1620 default:
1621 last_non_comment_line = newline_count;
1623 tp->type = xgettext_token_type_other;
1624 return;
1630 /* ========================= Extracting strings. ========================== */
1633 /* Context lookup table. */
1634 static flag_context_list_table_ty *flag_context_list_table;
1637 /* The file is broken into tokens. Scan the token stream, looking for
1638 a keyword, followed by a left paren, followed by a string. When we
1639 see this sequence, we have something to remember. We assume we are
1640 looking at a valid C or C++ program, and leave the complaints about
1641 the grammar to the compiler.
1643 Normal handling: Look for
1644 keyword ( ... msgid ... )
1645 Plural handling: Look for
1646 keyword ( ... msgid ... msgid_plural ... )
1648 We use recursion because the arguments before msgid or between msgid
1649 and msgid_plural can contain subexpressions of the same form. */
1652 /* Extract messages until the next balanced closing parenthesis.
1653 Extracted messages are added to MLP.
1654 When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and,
1655 if also a plural argument shall be extracted, PLURAL_COMMAS > 0,
1656 otherwise PLURAL_COMMAS = 0.
1657 When no specific argument shall be extracted, COMMAS_TO_SKIP < 0.
1658 Return true upon eof, false upon closing parenthesis. */
1659 static bool
1660 extract_parenthesized (message_list_ty *mlp,
1661 flag_context_ty outer_context,
1662 flag_context_list_iterator_ty context_iter,
1663 int commas_to_skip, int plural_commas)
1665 /* Remember the message containing the msgid, for msgid_plural. */
1666 message_ty *plural_mp = NULL;
1668 /* 0 when no keyword has been seen. 1 right after a keyword is seen. */
1669 int state;
1670 /* Parameters of the keyword just seen. Defined only in state 1. */
1671 int next_commas_to_skip = -1;
1672 int next_plural_commas = 0;
1673 /* Context iterator that will be used if the next token is a '('. */
1674 flag_context_list_iterator_ty next_context_iter =
1675 passthrough_context_list_iterator;
1676 /* Context iterator that will be used if the next token is a ':'.
1677 (Objective C selector syntax.) */
1678 flag_context_list_iterator_ty selectorcall_context_iter =
1679 passthrough_context_list_iterator;
1680 /* Current context. */
1681 flag_context_ty inner_context =
1682 inherited_context (outer_context,
1683 flag_context_list_iterator_advance (&context_iter));
1685 /* Start state is 0. */
1686 state = 0;
1688 for (;;)
1690 xgettext_token_ty token;
1692 x_c_lex (&token);
1693 switch (token.type)
1695 case xgettext_token_type_keyword:
1696 next_commas_to_skip = token.argnum1 - 1;
1697 next_plural_commas = (token.argnum2 > token.argnum1
1698 ? token.argnum2 - token.argnum1 : 0);
1699 state = 1;
1700 goto keyword_or_symbol;
1702 case xgettext_token_type_symbol:
1703 state = 0;
1704 keyword_or_symbol:
1705 next_context_iter =
1706 flag_context_list_iterator (
1707 flag_context_list_table_lookup (
1708 flag_context_list_table,
1709 token.string, strlen (token.string)));
1710 if (objc_extensions)
1712 size_t token_string_len = strlen (token.string);
1713 token.string = xrealloc (token.string, token_string_len + 2);
1714 token.string[token_string_len] = ':';
1715 token.string[token_string_len + 1] = '\0';
1716 selectorcall_context_iter =
1717 flag_context_list_iterator (
1718 flag_context_list_table_lookup (
1719 flag_context_list_table,
1720 token.string, token_string_len + 1));
1722 free (token.string);
1723 continue;
1725 case xgettext_token_type_lparen:
1726 if (extract_parenthesized (mlp, inner_context, next_context_iter,
1727 state ? next_commas_to_skip : -1,
1728 state ? next_plural_commas : 0))
1729 return true;
1730 next_context_iter = null_context_list_iterator;
1731 selectorcall_context_iter = null_context_list_iterator;
1732 state = 0;
1733 continue;
1735 case xgettext_token_type_rparen:
1736 return false;
1738 case xgettext_token_type_comma:
1739 if (commas_to_skip >= 0)
1741 if (commas_to_skip > 0)
1742 commas_to_skip--;
1743 else
1744 if (plural_mp != NULL && plural_commas > 0)
1746 commas_to_skip = plural_commas - 1;
1747 plural_commas = 0;
1749 else
1750 commas_to_skip = -1;
1752 inner_context =
1753 inherited_context (outer_context,
1754 flag_context_list_iterator_advance (
1755 &context_iter));
1756 next_context_iter = passthrough_context_list_iterator;
1757 selectorcall_context_iter = passthrough_context_list_iterator;
1758 state = 0;
1759 continue;
1761 case xgettext_token_type_colon:
1762 if (objc_extensions)
1764 context_iter = selectorcall_context_iter;
1765 inner_context =
1766 inherited_context (inner_context,
1767 flag_context_list_iterator_advance (
1768 &context_iter));
1769 next_context_iter = passthrough_context_list_iterator;
1770 selectorcall_context_iter = passthrough_context_list_iterator;
1772 else
1774 next_context_iter = null_context_list_iterator;
1775 selectorcall_context_iter = null_context_list_iterator;
1777 state = 0;
1778 continue;
1780 case xgettext_token_type_string_literal:
1781 if (extract_all)
1783 savable_comment_to_xgettext_comment (token.comment);
1784 remember_a_message (mlp, token.string, inner_context, &token.pos);
1785 savable_comment_reset ();
1787 else
1789 if (commas_to_skip == 0)
1791 if (plural_mp == NULL)
1793 /* Seen an msgid. */
1794 message_ty *mp;
1796 savable_comment_to_xgettext_comment (token.comment);
1797 mp = remember_a_message (mlp, token.string,
1798 inner_context, &token.pos);
1799 savable_comment_reset ();
1800 if (plural_commas > 0)
1801 plural_mp = mp;
1803 else
1805 /* Seen an msgid_plural. */
1806 remember_a_message_plural (plural_mp, token.string,
1807 inner_context, &token.pos);
1808 plural_mp = NULL;
1811 else
1812 free (token.string);
1814 drop_reference (token.comment);
1815 next_context_iter = null_context_list_iterator;
1816 selectorcall_context_iter = null_context_list_iterator;
1817 state = 0;
1818 continue;
1820 case xgettext_token_type_other:
1821 next_context_iter = null_context_list_iterator;
1822 selectorcall_context_iter = null_context_list_iterator;
1823 state = 0;
1824 continue;
1826 case xgettext_token_type_eof:
1827 return true;
1829 default:
1830 abort ();
1836 static void
1837 extract_whole_file (FILE *f,
1838 const char *real_filename, const char *logical_filename,
1839 flag_context_list_table_ty *flag_table,
1840 msgdomain_list_ty *mdlp)
1842 message_list_ty *mlp = mdlp->item[0]->messages;
1844 fp = f;
1845 real_file_name = real_filename;
1846 logical_file_name = xstrdup (logical_filename);
1847 line_number = 1;
1849 newline_count = 0;
1850 last_comment_line = -1;
1851 last_non_comment_line = -1;
1853 flag_context_list_table = flag_table;
1855 init_keywords ();
1857 /* Eat tokens until eof is seen. When extract_parenthesized returns
1858 due to an unbalanced closing parenthesis, just restart it. */
1859 while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
1860 -1, 0))
1863 /* Close scanner. */
1864 fp = NULL;
1865 real_file_name = NULL;
1866 logical_file_name = NULL;
1867 line_number = 0;
1871 void
1872 extract_c (FILE *f,
1873 const char *real_filename, const char *logical_filename,
1874 flag_context_list_table_ty *flag_table,
1875 msgdomain_list_ty *mdlp)
1877 objc_extensions = false;
1878 extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp);
1881 void
1882 extract_objc (FILE *f,
1883 const char *real_filename, const char *logical_filename,
1884 flag_context_list_table_ty *flag_table,
1885 msgdomain_list_ty *mdlp)
1887 objc_extensions = true;
1888 extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp);