1 /* Extracts strings from C source file to Uniforum style .po file.
2 Copyright (C) 1995-1998, 2000-2005 Free Software Foundation, Inc.
3 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
38 #include "file-list.h"
41 #include "error-progname.h"
43 #include "relocatable.h"
52 #include "c-strcase.h"
55 #include "read-po-abstract.h"
57 #include "po-charset.h"
58 #include "msgl-iconv.h"
59 #include "msgl-ascii.h"
65 /* A convenience macro. I don't like writing gettext() every time. */
66 #define _(str) gettext (str)
81 #include "x-smalltalk.h"
83 #include "x-properties.h"
90 #include "x-stringtable.h"
99 /* If nonzero add all comments immediately preceding one of the keywords. */
100 static bool add_all_comments
= false;
102 /* Tag used in comment of prevailing domain. */
103 static char *comment_tag
;
105 /* Name of default domain file. If not set defaults to messages.po. */
106 static const char *default_domain
;
108 /* If called with --debug option the output reflects whether format
109 string recognition is done automatically or forced by the user. */
112 /* Content of .po files with symbols to be excluded. */
113 message_list_ty
*exclude
;
115 /* Force output of PO file even if empty. */
118 /* Copyright holder of the output file and the translations. */
119 static const char *copyright_holder
= "THE PACKAGE'S COPYRIGHT HOLDER";
121 /* Email address or URL for reports of bugs in msgids. */
122 static const char *msgid_bugs_address
= NULL
;
124 /* String used as prefix for msgstr. */
125 static const char *msgstr_prefix
;
127 /* String used as suffix for msgstr. */
128 static const char *msgstr_suffix
;
130 /* Directory in which output files are created. */
131 static char *output_dir
;
133 /* The output syntax: .pot or .properties or .strings. */
134 static input_syntax_ty output_syntax
= syntax_po
;
136 /* If nonzero omit header with information about this run. */
137 int xgettext_omit_header
;
139 /* Table of flag_context_list_ty tables. */
140 static flag_context_list_table_ty flag_table_c
;
141 static flag_context_list_table_ty flag_table_objc
;
142 static flag_context_list_table_ty flag_table_gcc_internal
;
143 static flag_context_list_table_ty flag_table_sh
;
144 static flag_context_list_table_ty flag_table_python
;
145 static flag_context_list_table_ty flag_table_lisp
;
146 static flag_context_list_table_ty flag_table_elisp
;
147 static flag_context_list_table_ty flag_table_librep
;
148 static flag_context_list_table_ty flag_table_scheme
;
149 static flag_context_list_table_ty flag_table_java
;
150 static flag_context_list_table_ty flag_table_csharp
;
151 static flag_context_list_table_ty flag_table_awk
;
152 static flag_context_list_table_ty flag_table_ycp
;
153 static flag_context_list_table_ty flag_table_tcl
;
154 static flag_context_list_table_ty flag_table_perl
;
155 static flag_context_list_table_ty flag_table_php
;
157 /* If true, recognize Qt format strings. */
158 static bool recognize_format_qt
;
160 /* Canonicalized encoding name for all input files. */
161 const char *xgettext_global_source_encoding
;
164 /* Converter from xgettext_global_source_encoding to UTF-8 (except from
165 ASCII or UTF-8, when this conversion is a no-op). */
166 iconv_t xgettext_global_source_iconv
;
169 /* Canonicalized encoding name for the current input file. */
170 const char *xgettext_current_source_encoding
;
173 /* Converter from xgettext_current_source_encoding to UTF-8 (except from
174 ASCII or UTF-8, when this conversion is a no-op). */
175 iconv_t xgettext_current_source_iconv
;
179 static const struct option long_options
[] =
181 { "add-comments", optional_argument
, NULL
, 'c' },
182 { "add-location", no_argument
, &line_comment
, 1 },
183 { "c++", no_argument
, NULL
, 'C' },
184 { "copyright-holder", required_argument
, NULL
, CHAR_MAX
+ 1 },
185 { "debug", no_argument
, &do_debug
, 1 },
186 { "default-domain", required_argument
, NULL
, 'd' },
187 { "directory", required_argument
, NULL
, 'D' },
188 { "escape", no_argument
, NULL
, 'E' },
189 { "exclude-file", required_argument
, NULL
, 'x' },
190 { "extract-all", no_argument
, NULL
, 'a' },
191 { "files-from", required_argument
, NULL
, 'f' },
192 { "flag", required_argument
, NULL
, CHAR_MAX
+ 8 },
193 { "force-po", no_argument
, &force_po
, 1 },
194 { "foreign-user", no_argument
, NULL
, CHAR_MAX
+ 2 },
195 { "from-code", required_argument
, NULL
, CHAR_MAX
+ 3 },
196 { "help", no_argument
, NULL
, 'h' },
197 { "indent", no_argument
, NULL
, 'i' },
198 { "join-existing", no_argument
, NULL
, 'j' },
199 { "keyword", optional_argument
, NULL
, 'k' },
200 { "language", required_argument
, NULL
, 'L' },
201 { "msgid-bugs-address", required_argument
, NULL
, CHAR_MAX
+ 5 },
202 { "msgstr-prefix", optional_argument
, NULL
, 'm' },
203 { "msgstr-suffix", optional_argument
, NULL
, 'M' },
204 { "no-escape", no_argument
, NULL
, 'e' },
205 { "no-location", no_argument
, &line_comment
, 0 },
206 { "no-wrap", no_argument
, NULL
, CHAR_MAX
+ 4 },
207 { "omit-header", no_argument
, &xgettext_omit_header
, 1 },
208 { "output", required_argument
, NULL
, 'o' },
209 { "output-dir", required_argument
, NULL
, 'p' },
210 { "properties-output", no_argument
, NULL
, CHAR_MAX
+ 6 },
211 { "qt", no_argument
, NULL
, CHAR_MAX
+ 9 },
212 { "sort-by-file", no_argument
, NULL
, 'F' },
213 { "sort-output", no_argument
, NULL
, 's' },
214 { "strict", no_argument
, NULL
, 'S' },
215 { "string-limit", required_argument
, NULL
, 'l' },
216 { "stringtable-output", no_argument
, NULL
, CHAR_MAX
+ 7 },
217 { "trigraphs", no_argument
, NULL
, 'T' },
218 { "version", no_argument
, NULL
, 'V' },
219 { "width", required_argument
, NULL
, 'w', },
224 /* The extractors must all be functions returning void and taking three
225 arguments designating the input stream and one message domain list argument
226 in which to add the messages. */
227 typedef void (*extractor_func
) (FILE *fp
, const char *real_filename
,
228 const char *logical_filename
,
229 flag_context_list_table_ty
*flag_table
,
230 msgdomain_list_ty
*mdlp
);
232 typedef struct extractor_ty extractor_ty
;
236 flag_context_list_table_ty
*flag_table
;
237 struct formatstring_parser
*formatstring_parser1
;
238 struct formatstring_parser
*formatstring_parser2
;
242 /* Forward declaration of local functions. */
243 static void usage (int status
)
244 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ > 4) || __GNUC__ > 2)
245 __attribute__ ((noreturn
))
248 static void read_exclusion_file (char *file_name
);
249 static void extract_from_file (const char *file_name
, extractor_ty extractor
,
250 msgdomain_list_ty
*mdlp
);
251 static message_ty
*construct_header (void);
252 static void finalize_header (msgdomain_list_ty
*mdlp
);
253 static extractor_ty
language_to_extractor (const char *name
);
254 static const char *extension_to_language (const char *extension
);
258 main (int argc
, char *argv
[])
262 bool do_help
= false;
263 bool do_version
= false;
264 msgdomain_list_ty
*mdlp
;
265 bool join_existing
= false;
266 bool no_default_keywords
= false;
267 bool some_additional_keywords
= false;
268 bool sort_by_msgid
= false;
269 bool sort_by_filepos
= false;
270 const char *file_name
;
271 const char *files_from
= NULL
;
272 string_list_ty
*file_list
;
273 char *output_file
= NULL
;
274 const char *language
= NULL
;
275 extractor_ty extractor
= { NULL
, NULL
, NULL
, NULL
};
277 /* Set program name for messages. */
278 set_program_name (argv
[0]);
279 error_print_progname
= maybe_print_progname
;
281 #ifdef HAVE_SETLOCALE
282 /* Set locale via LC_ALL. */
283 setlocale (LC_ALL
, "");
286 /* Set the text message domain. */
287 bindtextdomain (PACKAGE
, relocate (LOCALEDIR
));
288 textdomain (PACKAGE
);
290 /* Ensure that write errors on stdout are detected. */
291 atexit (close_stdout
);
293 /* Set initial value of variables. */
294 default_domain
= MESSAGE_DOMAIN_DEFAULT
;
295 xgettext_global_source_encoding
= po_charset_ascii
;
296 init_flag_table_c ();
297 init_flag_table_objc ();
298 init_flag_table_gcc_internal ();
299 init_flag_table_sh ();
300 init_flag_table_python ();
301 init_flag_table_lisp ();
302 init_flag_table_elisp ();
303 init_flag_table_librep ();
304 init_flag_table_scheme ();
305 init_flag_table_java ();
306 init_flag_table_csharp ();
307 init_flag_table_awk ();
308 init_flag_table_ycp ();
309 init_flag_table_tcl ();
310 init_flag_table_perl ();
311 init_flag_table_php ();
313 while ((optchar
= getopt_long (argc
, argv
,
314 "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:x:",
315 long_options
, NULL
)) != EOF
)
318 case '\0': /* Long option. */
323 x_python_extract_all ();
324 x_lisp_extract_all ();
325 x_elisp_extract_all ();
326 x_librep_extract_all ();
327 x_scheme_extract_all ();
328 x_java_extract_all ();
329 x_csharp_extract_all ();
330 x_awk_extract_all ();
331 x_tcl_extract_all ();
332 x_perl_extract_all ();
333 x_php_extract_all ();
334 x_glade_extract_all ();
339 add_all_comments
= true;
344 add_all_comments
= false;
345 comment_tag
= optarg
;
346 /* We ignore leading white space. */
347 while (isspace ((unsigned char) *comment_tag
))
355 default_domain
= optarg
;
358 dir_list_append (optarg
);
361 message_print_style_escape (false);
364 message_print_style_escape (true);
370 sort_by_filepos
= true;
376 message_print_style_indent ();
379 join_existing
= true;
382 if (optarg
== NULL
|| *optarg
!= '\0')
384 x_c_keyword (optarg
);
385 x_objc_keyword (optarg
);
386 x_sh_keyword (optarg
);
387 x_python_keyword (optarg
);
388 x_lisp_keyword (optarg
);
389 x_elisp_keyword (optarg
);
390 x_librep_keyword (optarg
);
391 x_scheme_keyword (optarg
);
392 x_java_keyword (optarg
);
393 x_csharp_keyword (optarg
);
394 x_awk_keyword (optarg
);
395 x_tcl_keyword (optarg
);
396 x_perl_keyword (optarg
);
397 x_php_keyword (optarg
);
398 x_glade_keyword (optarg
);
400 no_default_keywords
= true;
402 some_additional_keywords
= true;
406 /* Accepted for backward compatibility with 0.10.35. */
412 /* -m takes an optional argument. If none is given "" is assumed. */
413 msgstr_prefix
= optarg
== NULL
? "" : optarg
;
416 /* -M takes an optional argument. If none is given "" is assumed. */
417 msgstr_suffix
= optarg
== NULL
? "" : optarg
;
423 output_file
= optarg
;
427 size_t len
= strlen (optarg
);
429 if (output_dir
!= NULL
)
432 if (optarg
[len
- 1] == '/')
433 output_dir
= xstrdup (optarg
);
435 output_dir
= xasprintf ("%s/", optarg
);
439 sort_by_msgid
= true;
442 message_print_style_uniforum ();
454 value
= strtol (optarg
, &endp
, 10);
456 message_page_width_set (value
);
460 read_exclusion_file (optarg
);
462 case CHAR_MAX
+ 1: /* --copyright-holder */
463 copyright_holder
= optarg
;
465 case CHAR_MAX
+ 2: /* --foreign-user */
466 copyright_holder
= "";
468 case CHAR_MAX
+ 3: /* --from-code */
469 xgettext_global_source_encoding
= po_charset_canonicalize (optarg
);
470 if (xgettext_global_source_encoding
== NULL
)
471 xgettext_global_source_encoding
= po_charset_ascii
;
473 case CHAR_MAX
+ 4: /* --no-wrap */
474 message_page_width_ignore ();
476 case CHAR_MAX
+ 5: /* --msgid-bugs-address */
477 msgid_bugs_address
= optarg
;
479 case CHAR_MAX
+ 6: /* --properties-output */
480 message_print_syntax_properties ();
481 output_syntax
= syntax_properties
;
483 case CHAR_MAX
+ 7: /* --stringtable-output */
484 message_print_syntax_stringtable ();
485 output_syntax
= syntax_stringtable
;
487 case CHAR_MAX
+ 8: /* --flag */
488 xgettext_record_flag (optarg
);
490 case CHAR_MAX
+ 9: /* --qt */
491 recognize_format_qt
= true;
494 usage (EXIT_FAILURE
);
498 /* Version information requested. */
501 printf ("%s (GNU %s) %s\n", basename (program_name
), PACKAGE
, VERSION
);
502 /* xgettext: no-wrap */
503 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
504 This is free software; see the source for copying conditions. There is NO\n\
505 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
507 "1995-1998, 2000-2005");
508 printf (_("Written by %s.\n"), "Ulrich Drepper");
512 /* Help is requested. */
514 usage (EXIT_SUCCESS
);
516 /* Verify selected options. */
517 if (!line_comment
&& sort_by_filepos
)
518 error (EXIT_FAILURE
, 0, _("%s and %s are mutually exclusive"),
519 "--no-location", "--sort-by-file");
521 if (sort_by_msgid
&& sort_by_filepos
)
522 error (EXIT_FAILURE
, 0, _("%s and %s are mutually exclusive"),
523 "--sort-output", "--sort-by-file");
525 if (join_existing
&& strcmp (default_domain
, "-") == 0)
526 error (EXIT_FAILURE
, 0, _("\
527 --join-existing cannot be used when output is written to stdout"));
529 if (no_default_keywords
&& !some_additional_keywords
)
532 xgettext cannot work without keywords to look for"));
533 usage (EXIT_FAILURE
);
536 /* Test whether we have some input files given. */
537 if (files_from
== NULL
&& optind
>= argc
)
539 error (EXIT_SUCCESS
, 0, _("no input file given"));
540 usage (EXIT_FAILURE
);
543 /* Determine extractor from language. */
544 if (language
!= NULL
)
545 extractor
= language_to_extractor (language
);
547 /* Canonize msgstr prefix/suffix. */
548 if (msgstr_prefix
!= NULL
&& msgstr_suffix
== NULL
)
550 else if (msgstr_prefix
== NULL
&& msgstr_suffix
!= NULL
)
553 /* Default output directory is the current directory. */
554 if (output_dir
== NULL
)
557 /* Construct the name of the output file. If the default domain has
558 the special name "-" we write to stdout. */
561 if (IS_ABSOLUTE_PATH (output_file
) || strcmp (output_file
, "-") == 0)
562 file_name
= xstrdup (output_file
);
564 /* Please do NOT add a .po suffix! */
565 file_name
= concatenated_pathname (output_dir
, output_file
, NULL
);
567 else if (strcmp (default_domain
, "-") == 0)
570 file_name
= concatenated_pathname (output_dir
, default_domain
, ".po");
572 /* Determine list of files we have to process. */
573 if (files_from
!= NULL
)
574 file_list
= read_names_from_file (files_from
);
576 file_list
= string_list_alloc ();
577 /* Append names from command line. */
578 for (cnt
= optind
; cnt
< argc
; ++cnt
)
579 string_list_append_unique (file_list
, argv
[cnt
]);
581 /* Allocate converter from xgettext_global_source_encoding to UTF-8 (except
582 from ASCII or UTF-8, when this conversion is a no-op). */
583 if (xgettext_global_source_encoding
!= po_charset_ascii
584 && xgettext_global_source_encoding
!= po_charset_utf8
)
589 /* Avoid glibc-2.1 bug with EUC-KR. */
590 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
591 if (strcmp (xgettext_global_source_encoding
, "EUC-KR") == 0)
595 cd
= iconv_open (po_charset_utf8
, xgettext_global_source_encoding
);
596 if (cd
== (iconv_t
)(-1))
597 error (EXIT_FAILURE
, 0, _("\
598 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
599 and iconv() does not support this conversion."),
600 xgettext_global_source_encoding
, po_charset_utf8
,
601 basename (program_name
));
602 xgettext_global_source_iconv
= cd
;
604 error (EXIT_FAILURE
, 0, _("\
605 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
606 This version was built without iconv()."),
607 xgettext_global_source_encoding
, po_charset_utf8
,
608 basename (program_name
));
612 /* Allocate a message list to remember all the messages. */
613 mdlp
= msgdomain_list_alloc (true);
615 /* Generate a header, so that we know how and when this PO file was
617 if (!xgettext_omit_header
)
618 message_list_append (mdlp
->item
[0]->messages
, construct_header ());
620 /* Read in the old messages, so that we can add to them. */
623 /* Temporarily reset the directory list to empty, because file_name
624 is an output file and therefore should not be searched for. */
625 void *saved_directory_list
= dir_list_save_reset ();
626 extractor_ty po_extractor
= { extract_po
, NULL
, NULL
, NULL
};
628 extract_from_file (file_name
, po_extractor
, mdlp
);
629 if (!is_ascii_msgdomain_list (mdlp
))
630 mdlp
= iconv_msgdomain_list (mdlp
, "UTF-8", file_name
);
632 dir_list_restore (saved_directory_list
);
635 /* Process all input files. */
636 for (cnt
= 0; cnt
< file_list
->nitems
; ++cnt
)
638 const char *filename
;
639 extractor_ty this_file_extractor
;
641 filename
= file_list
->item
[cnt
];
644 this_file_extractor
= extractor
;
649 const char *extension
;
650 const char *language
;
652 base
= strrchr (filename
, '/');
656 reduced
= xstrdup (base
);
657 /* Remove a trailing ".in" - it's a generic suffix. */
658 if (strlen (reduced
) >= 3
659 && memcmp (reduced
+ strlen (reduced
) - 3, ".in", 3) == 0)
660 reduced
[strlen (reduced
) - 3] = '\0';
662 /* Work out what the file extension is. */
663 extension
= strrchr (reduced
, '.');
669 /* Derive the language from the extension, and the extractor
670 function from the language. */
671 language
= extension_to_language (extension
);
672 if (language
== NULL
)
675 warning: file `%s' extension `%s' is unknown; will try C"), filename
, extension
);
678 this_file_extractor
= language_to_extractor (language
);
683 /* Extract the strings from the file. */
684 extract_from_file (filename
, this_file_extractor
, mdlp
);
686 string_list_free (file_list
);
688 /* Finalize the constructed header. */
689 if (!xgettext_omit_header
)
690 finalize_header (mdlp
);
692 /* Free the allocated converter. */
694 if (xgettext_global_source_encoding
!= po_charset_ascii
695 && xgettext_global_source_encoding
!= po_charset_utf8
)
696 iconv_close (xgettext_global_source_iconv
);
699 /* Sorting the list of messages. */
701 msgdomain_list_sort_by_filepos (mdlp
);
702 else if (sort_by_msgid
)
703 msgdomain_list_sort_by_msgid (mdlp
);
705 /* Write the PO file. */
706 msgdomain_list_print (mdlp
, file_name
, force_po
, do_debug
);
712 /* Display usage information and exit. */
716 if (status
!= EXIT_SUCCESS
)
717 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
722 Usage: %s [OPTION] [INPUTFILE]...\n\
726 Extract translatable strings from given input files.\n\
729 /* xgettext: no-wrap */
731 Mandatory arguments to long options are mandatory for short options too.\n\
732 Similarly for optional arguments.\n\
736 Input file location:\n"));
738 INPUTFILE ... input files\n"));
740 -f, --files-from=FILE get list of input files from FILE\n"));
742 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
744 If input file is -, standard input is read.\n"));
747 Output file location:\n"));
749 -d, --default-domain=NAME use NAME.po for output (instead of messages.po)\n"));
751 -o, --output=FILE write output to specified file\n"));
753 -p, --output-dir=DIR output files will be placed in directory DIR\n"));
755 If output file is -, output is written to standard output.\n"));
758 Choice of input file language:\n"));
760 -L, --language=NAME recognise the specified language\n\
761 (C, C++, ObjectiveC, PO, Shell, Python, Lisp,\n\
762 EmacsLisp, librep, Scheme, Smalltalk, Java,\n\
763 JavaProperties, C#, awk, YCP, Tcl, Perl, PHP,\n\
764 GCC-source, NXStringTable, RST, Glade)\n"));
766 -C, --c++ shorthand for --language=C++\n"));
768 By default the language is guessed depending on the input file name extension.\n"));
771 Input file interpretation:\n"));
773 --from-code=NAME encoding of input files\n\
774 (except for Python, Tcl, Glade)\n"));
776 By default the input files are assumed to be in ASCII.\n"));
779 Operation mode:\n"));
781 -j, --join-existing join messages with existing file\n"));
783 -x, --exclude-file=FILE.po entries from FILE.po are not extracted\n"));
785 -c, --add-comments[=TAG] place comment block with TAG (or those\n\
786 preceding keyword lines) in output file\n"));
789 Language specific options:\n"));
791 -a, --extract-all extract all strings\n"));
793 (only languages C, C++, ObjectiveC, Shell,\n\
794 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
795 C#, awk, Tcl, Perl, PHP, GCC-source, Glade)\n"));
797 -k, --keyword[=WORD] additional keyword to be looked for (without\n\
798 WORD means not to use default keywords)\n"));
800 (only languages C, C++, ObjectiveC, Shell,\n\
801 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
802 C#, awk, Tcl, Perl, PHP, GCC-source, Glade)\n"));
804 --flag=WORD:ARG:FLAG additional flag for strings inside the argument\n\
805 number ARG of keyword WORD\n"));
807 (only languages C, C++, ObjectiveC, Shell,\n\
808 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
809 C#, awk, YCP, Tcl, Perl, PHP, GCC-source)\n"));
811 -T, --trigraphs understand ANSI C trigraphs for input\n"));
813 (only languages C, C++, ObjectiveC)\n"));
815 --qt recognize Qt format strings\n"));
817 (only language C++)\n"));
819 --debug more detailed formatstring recognition result\n"));
822 Output details:\n"));
824 -e, --no-escape do not use C escapes in output (default)\n"));
826 -E, --escape use C escapes in output, no extended chars\n"));
828 --force-po write PO file even if empty\n"));
830 -i, --indent write the .po file using indented style\n"));
832 --no-location do not write '#: filename:line' lines\n"));
834 -n, --add-location generate '#: filename:line' lines (default)\n"));
836 --strict write out strict Uniforum conforming .po file\n"));
838 --properties-output write out a Java .properties file\n"));
840 --stringtable-output write out a NeXTstep/GNUstep .strings file\n"));
842 -w, --width=NUMBER set output page width\n"));
844 --no-wrap do not break long message lines, longer than\n\
845 the output page width, into several lines\n"));
847 -s, --sort-output generate sorted output\n"));
849 -F, --sort-by-file sort output by file location\n"));
851 --omit-header don't write header with `msgid \"\"' entry\n"));
853 --copyright-holder=STRING set copyright holder in output\n"));
855 --foreign-user omit FSF copyright in output for foreign user\n"));
857 --msgid-bugs-address=EMAIL@ADDRESS set report address for msgid bugs\n"));
859 -m, --msgstr-prefix[=STRING] use STRING or \"\" as prefix for msgstr entries\n"));
861 -M, --msgstr-suffix[=STRING] use STRING or \"\" as suffix for msgstr entries\n"));
864 Informative output:\n"));
866 -h, --help display this help and exit\n"));
868 -V, --version output version information and exit\n"));
870 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
879 exclude_directive_domain (abstract_po_reader_ty
*pop
, char *name
)
881 po_gram_error_at_line (&gram_pos
,
882 _("this file may not contain domain directives"));
887 exclude_directive_message (abstract_po_reader_ty
*pop
,
889 lex_pos_ty
*msgid_pos
,
891 char *msgstr
, size_t msgstr_len
,
892 lex_pos_ty
*msgstr_pos
,
893 bool force_fuzzy
, bool obsolete
)
897 /* See if this message ID has been seen before. */
899 exclude
= message_list_alloc (true);
900 mp
= message_list_search (exclude
, msgid
);
905 mp
= message_alloc (msgid
, msgid_plural
, "", 1, msgstr_pos
);
906 /* Do not free msgid. */
907 message_list_append (exclude
, mp
);
910 /* All we care about is the msgid. Throw the msgstr away.
911 Don't even check for duplicate msgids. */
916 /* So that the one parser can be used for multiple programs, and also
917 use good data hiding and encapsulation practices, an object
918 oriented approach has been taken. An object instance is allocated,
919 and all actions resulting from the parse will be through
920 invocations of method functions of that object. */
922 static abstract_po_reader_class_ty exclude_methods
=
924 sizeof (abstract_po_reader_ty
),
925 NULL
, /* constructor */
926 NULL
, /* destructor */
927 NULL
, /* parse_brief */
928 NULL
, /* parse_debrief */
929 exclude_directive_domain
,
930 exclude_directive_message
,
932 NULL
, /* comment_dot */
933 NULL
, /* comment_filepos */
934 NULL
, /* comment_special */
939 read_exclusion_file (char *filename
)
942 FILE *fp
= open_po_file (filename
, &real_filename
, true);
943 abstract_po_reader_ty
*pop
;
945 pop
= po_reader_alloc (&exclude_methods
);
946 po_scan (pop
, fp
, real_filename
, filename
, input_syntax
);
947 po_reader_free (pop
);
955 split_keywordspec (const char *spec
,
956 const char **endp
, int *argnum1p
, int *argnum2p
)
960 /* Start parsing from the end. */
961 p
= spec
+ strlen (spec
);
962 if (p
> spec
&& isdigit ((unsigned char) p
[-1]))
964 const char *last_arg
;
968 while (p
> spec
&& isdigit ((unsigned char) p
[-1]));
972 if (p
> spec
&& p
[-1] == ',')
976 if (p
> spec
&& isdigit ((unsigned char) p
[-1]))
978 const char *first_arg
;
982 while (p
> spec
&& isdigit ((unsigned char) p
[-1]));
986 if (p
> spec
&& p
[-1] == ':')
988 /* Parsed "KEYWORD:ARGNUM1,ARGNUM2". */
992 *argnum1p
= strtol (first_arg
, &dummy
, 10);
993 *argnum2p
= strtol (last_arg
, &dummy
, 10);
998 else if (p
> spec
&& p
[-1] == ':')
1000 /* Parsed "KEYWORD:ARGNUM1. */
1004 *argnum1p
= strtol (last_arg
, &dummy
, 10);
1009 /* Parsed "KEYWORD". */
1010 *endp
= p
+ strlen (p
);
1017 flag_context_ty null_context
= { undecided
, false, undecided
, false };
1019 /* Transparent context. */
1020 flag_context_ty passthrough_context
= { undecided
, true, undecided
, true };
1024 inherited_context (flag_context_ty outer_context
,
1025 flag_context_ty modifier_context
)
1027 flag_context_ty result
= modifier_context
;
1029 if (result
.pass_format1
)
1031 result
.is_format1
= outer_context
.is_format1
;
1032 result
.pass_format1
= false;
1034 if (result
.pass_format2
)
1036 result
.is_format2
= outer_context
.is_format2
;
1037 result
.pass_format2
= false;
1043 /* Null context list iterator. */
1044 flag_context_list_iterator_ty null_context_list_iterator
= { 1, NULL
};
1046 /* Transparent context list iterator. */
1047 static flag_context_list_ty passthrough_context_circular_list
=
1050 { undecided
, true, undecided
, true },
1051 &passthrough_context_circular_list
1053 flag_context_list_iterator_ty passthrough_context_list_iterator
=
1056 &passthrough_context_circular_list
1060 flag_context_list_iterator_ty
1061 flag_context_list_iterator (flag_context_list_ty
*list
)
1063 flag_context_list_iterator_ty result
;
1072 flag_context_list_iterator_advance (flag_context_list_iterator_ty
*iter
)
1074 if (iter
->head
== NULL
)
1075 return null_context
;
1076 if (iter
->argnum
== iter
->head
->argnum
)
1078 flag_context_ty result
= iter
->head
->flags
;
1080 /* Special casing of circular list. */
1081 if (iter
->head
!= iter
->head
->next
)
1083 iter
->head
= iter
->head
->next
;
1092 return null_context
;
1097 flag_context_list_ty
*
1098 flag_context_list_table_lookup (flag_context_list_table_ty
*flag_table
,
1099 const void *key
, size_t keylen
)
1103 if (flag_table
->table
!= NULL
1104 && find_entry (flag_table
, key
, keylen
, &entry
) == 0)
1105 return (flag_context_list_ty
*) entry
;
1112 flag_context_list_table_insert (flag_context_list_table_ty
*table
,
1114 const char *name_start
, const char *name_end
,
1115 int argnum
, enum is_format value
, bool pass
)
1117 char *allocated_name
= NULL
;
1119 if (table
== &flag_table_lisp
)
1121 /* Convert NAME to upper case. */
1122 size_t name_len
= name_end
- name_start
;
1123 char *name
= allocated_name
= (char *) xallocsa (name_len
);
1126 for (i
= 0; i
< name_len
; i
++)
1127 name
[i
] = (name_start
[i
] >= 'a' && name_start
[i
] <= 'z'
1128 ? name_start
[i
] - 'a' + 'A'
1131 name_end
= name
+ name_len
;
1133 else if (table
== &flag_table_tcl
)
1135 /* Remove redundant "::" prefix. */
1136 if (name_end
- name_start
> 2
1137 && name_start
[0] == ':' && name_start
[1] == ':')
1141 /* Insert the pair (VALUE, PASS) at INDEX in the element numbered ARGNUM
1142 of the list corresponding to NAME in the TABLE. */
1143 if (table
->table
== NULL
)
1144 init_hash (table
, 100);
1148 if (find_entry (table
, name_start
, name_end
- name_start
, &entry
) != 0)
1150 /* Create new hash table entry. */
1151 flag_context_list_ty
*list
=
1152 (flag_context_list_ty
*) xmalloc (sizeof (flag_context_list_ty
));
1153 list
->argnum
= argnum
;
1154 memset (&list
->flags
, '\0', sizeof (list
->flags
));
1158 list
->flags
.is_format1
= value
;
1159 list
->flags
.pass_format1
= pass
;
1162 list
->flags
.is_format2
= value
;
1163 list
->flags
.pass_format2
= pass
;
1169 insert_entry (table
, name_start
, name_end
- name_start
, list
);
1173 flag_context_list_ty
*list
= (flag_context_list_ty
*)entry
;
1174 flag_context_list_ty
**lastp
= NULL
;
1176 while (list
!= NULL
&& list
->argnum
< argnum
)
1178 lastp
= &list
->next
;
1181 if (list
!= NULL
&& list
->argnum
== argnum
)
1183 /* Add this flag to the current argument number. */
1187 list
->flags
.is_format1
= value
;
1188 list
->flags
.pass_format1
= pass
;
1191 list
->flags
.is_format2
= value
;
1192 list
->flags
.pass_format2
= pass
;
1198 else if (lastp
!= NULL
)
1200 /* Add a new list entry for this argument number. */
1202 (flag_context_list_ty
*) xmalloc (sizeof (flag_context_list_ty
));
1203 list
->argnum
= argnum
;
1204 memset (&list
->flags
, '\0', sizeof (list
->flags
));
1208 list
->flags
.is_format1
= value
;
1209 list
->flags
.pass_format1
= pass
;
1212 list
->flags
.is_format2
= value
;
1213 list
->flags
.pass_format2
= pass
;
1218 list
->next
= *lastp
;
1223 /* Add a new list entry for this argument number, at the beginning
1224 of the list. Since we don't have an API for replacing the
1225 value of a key in the hash table, we have to copy the first
1227 flag_context_list_ty
*copy
=
1228 (flag_context_list_ty
*) xmalloc (sizeof (flag_context_list_ty
));
1231 list
->argnum
= argnum
;
1232 memset (&list
->flags
, '\0', sizeof (list
->flags
));
1236 list
->flags
.is_format1
= value
;
1237 list
->flags
.pass_format1
= pass
;
1240 list
->flags
.is_format2
= value
;
1241 list
->flags
.pass_format2
= pass
;
1251 if (allocated_name
!= NULL
)
1252 freesa (allocated_name
);
1257 xgettext_record_flag (const char *optionstring
)
1259 /* Check the string has at least two colons. (Colons in the name are
1260 allowed, needed for the Lisp and the Tcl backends.) */
1264 for (colon2
= optionstring
+ strlen (optionstring
); ; )
1266 if (colon2
== optionstring
)
1272 for (colon1
= colon2
; ; )
1274 if (colon1
== optionstring
)
1281 const char *name_start
= optionstring
;
1282 const char *name_end
= colon1
;
1283 const char *argnum_start
= colon1
+ 1;
1284 const char *argnum_end
= colon2
;
1285 const char *flag
= colon2
+ 1;
1288 /* Check the parts' syntax. */
1289 if (name_end
== name_start
)
1291 if (argnum_end
== argnum_start
)
1295 argnum
= strtol (argnum_start
, &endp
, 10);
1296 if (endp
!= argnum_end
)
1302 /* Analyze the flag part. */
1307 if (strlen (flag
) >= 5 && memcmp (flag
, "pass-", 5) == 0)
1313 /* Unlike po_parse_comment_special(), we don't accept "fuzzy" or "wrap"
1314 here - it has no sense. */
1315 if (strlen (flag
) >= 7
1316 && memcmp (flag
+ strlen (flag
) - 7, "-format", 7) == 0)
1320 enum is_format value
;
1324 n
= strlen (flag
) - 7;
1326 if (n
>= 3 && memcmp (p
, "no-", 3) == 0)
1332 else if (n
>= 9 && memcmp (p
, "possible-", 9) == 0)
1338 else if (n
>= 11 && memcmp (p
, "impossible-", 11) == 0)
1345 value
= yes_according_to_context
;
1347 for (type
= 0; type
< NFORMATS
; type
++)
1348 if (strlen (format_language
[type
]) == n
1349 && memcmp (format_language
[type
], p
, n
) == 0)
1354 flag_context_list_table_insert (&flag_table_c
, 0,
1355 name_start
, name_end
,
1356 argnum
, value
, pass
);
1357 flag_context_list_table_insert (&flag_table_objc
, 0,
1358 name_start
, name_end
,
1359 argnum
, value
, pass
);
1362 flag_context_list_table_insert (&flag_table_objc
, 1,
1363 name_start
, name_end
,
1364 argnum
, value
, pass
);
1367 flag_context_list_table_insert (&flag_table_sh
, 0,
1368 name_start
, name_end
,
1369 argnum
, value
, pass
);
1372 flag_context_list_table_insert (&flag_table_python
, 0,
1373 name_start
, name_end
,
1374 argnum
, value
, pass
);
1377 flag_context_list_table_insert (&flag_table_lisp
, 0,
1378 name_start
, name_end
,
1379 argnum
, value
, pass
);
1382 flag_context_list_table_insert (&flag_table_elisp
, 0,
1383 name_start
, name_end
,
1384 argnum
, value
, pass
);
1387 flag_context_list_table_insert (&flag_table_librep
, 0,
1388 name_start
, name_end
,
1389 argnum
, value
, pass
);
1392 flag_context_list_table_insert (&flag_table_scheme
, 0,
1393 name_start
, name_end
,
1394 argnum
, value
, pass
);
1396 case format_smalltalk
:
1399 flag_context_list_table_insert (&flag_table_java
, 0,
1400 name_start
, name_end
,
1401 argnum
, value
, pass
);
1404 flag_context_list_table_insert (&flag_table_csharp
, 0,
1405 name_start
, name_end
,
1406 argnum
, value
, pass
);
1409 flag_context_list_table_insert (&flag_table_awk
, 0,
1410 name_start
, name_end
,
1411 argnum
, value
, pass
);
1416 flag_context_list_table_insert (&flag_table_ycp
, 0,
1417 name_start
, name_end
,
1418 argnum
, value
, pass
);
1421 flag_context_list_table_insert (&flag_table_tcl
, 0,
1422 name_start
, name_end
,
1423 argnum
, value
, pass
);
1426 flag_context_list_table_insert (&flag_table_perl
, 0,
1427 name_start
, name_end
,
1428 argnum
, value
, pass
);
1430 case format_perl_brace
:
1431 flag_context_list_table_insert (&flag_table_perl
, 1,
1432 name_start
, name_end
,
1433 argnum
, value
, pass
);
1436 flag_context_list_table_insert (&flag_table_php
, 0,
1437 name_start
, name_end
,
1438 argnum
, value
, pass
);
1440 case format_gcc_internal
:
1441 flag_context_list_table_insert (&flag_table_gcc_internal
, 0,
1442 name_start
, name_end
,
1443 argnum
, value
, pass
);
1446 flag_context_list_table_insert (&flag_table_c
, 0,
1447 name_start
, name_end
,
1448 argnum
, value
, pass
);
1455 /* If the flag is not among the valid values, the optionstring is
1462 error (EXIT_FAILURE
, 0, _("\
1463 A --flag argument doesn't have the <keyword>:<argnum>:[pass-]<flag> syntax: %s"),
1468 static string_list_ty
*comment
;
1471 xgettext_comment_add (const char *str
)
1473 if (comment
== NULL
)
1474 comment
= string_list_alloc ();
1475 string_list_append (comment
, str
);
1479 xgettext_comment (size_t n
)
1481 if (comment
== NULL
|| n
>= comment
->nitems
)
1483 return comment
->item
[n
];
1487 xgettext_comment_reset ()
1489 if (comment
!= NULL
)
1491 string_list_free (comment
);
1497 refcounted_string_list_ty
*savable_comment
;
1500 savable_comment_add (const char *str
)
1502 if (savable_comment
== NULL
)
1505 (refcounted_string_list_ty
*) xmalloc (sizeof (*savable_comment
));
1506 savable_comment
->refcount
= 1;
1507 string_list_init (&savable_comment
->contents
);
1509 else if (savable_comment
->refcount
> 1)
1511 /* Unshare the list by making copies. */
1512 struct string_list_ty
*oldcontents
;
1515 savable_comment
->refcount
--;
1516 oldcontents
= &savable_comment
->contents
;
1519 (refcounted_string_list_ty
*) xmalloc (sizeof (*savable_comment
));
1520 savable_comment
->refcount
= 1;
1521 string_list_init (&savable_comment
->contents
);
1522 for (i
= 0; i
< oldcontents
->nitems
; i
++)
1523 string_list_append (&savable_comment
->contents
, oldcontents
->item
[i
]);
1525 string_list_append (&savable_comment
->contents
, str
);
1529 savable_comment_reset ()
1531 drop_reference (savable_comment
);
1532 savable_comment
= NULL
;
1536 savable_comment_to_xgettext_comment (refcounted_string_list_ty
*rslp
)
1538 xgettext_comment_reset ();
1543 for (i
= 0; i
< rslp
->contents
.nitems
; i
++)
1544 xgettext_comment_add (rslp
->contents
.item
[i
]);
1551 xgettext_open (const char *fn
,
1552 char **logical_file_name_p
, char **real_file_name_p
)
1556 char *logical_file_name
;
1558 if (strcmp (fn
, "-") == 0)
1560 new_name
= xstrdup (_("standard input"));
1561 logical_file_name
= xstrdup (new_name
);
1564 else if (IS_ABSOLUTE_PATH (fn
))
1566 new_name
= xstrdup (fn
);
1567 fp
= fopen (fn
, "r");
1569 error (EXIT_FAILURE
, errno
, _("\
1570 error while opening \"%s\" for reading"), fn
);
1571 logical_file_name
= xstrdup (new_name
);
1579 const char *dir
= dir_list_nth (j
);
1582 error (EXIT_FAILURE
, ENOENT
, _("\
1583 error while opening \"%s\" for reading"), fn
);
1585 new_name
= concatenated_pathname (dir
, fn
, NULL
);
1587 fp
= fopen (new_name
, "r");
1591 if (errno
!= ENOENT
)
1592 error (EXIT_FAILURE
, errno
, _("\
1593 error while opening \"%s\" for reading"), new_name
);
1597 /* Note that the NEW_NAME variable contains the actual file name
1598 and the logical file name is what is reported by xgettext. In
1599 this case NEW_NAME is set to the file which was found along the
1600 directory search path, and LOGICAL_FILE_NAME is is set to the
1601 file name which was searched for. */
1602 logical_file_name
= xstrdup (fn
);
1605 *logical_file_name_p
= logical_file_name
;
1606 *real_file_name_p
= new_name
;
1611 /* Language dependent format string parser.
1612 NULL if the language has no notion of format strings. */
1613 static struct formatstring_parser
*current_formatstring_parser1
;
1614 static struct formatstring_parser
*current_formatstring_parser2
;
1618 extract_from_file (const char *file_name
, extractor_ty extractor
,
1619 msgdomain_list_ty
*mdlp
)
1621 char *logical_file_name
;
1622 char *real_file_name
;
1623 FILE *fp
= xgettext_open (file_name
, &logical_file_name
, &real_file_name
);
1625 /* Set the default for the source file encoding. May be overridden by
1626 the extractor function. */
1627 xgettext_current_source_encoding
= xgettext_global_source_encoding
;
1629 xgettext_current_source_iconv
= xgettext_global_source_iconv
;
1632 current_formatstring_parser1
= extractor
.formatstring_parser1
;
1633 current_formatstring_parser2
= extractor
.formatstring_parser2
;
1634 extractor
.func (fp
, real_file_name
, logical_file_name
, extractor
.flag_table
,
1639 free (logical_file_name
);
1640 free (real_file_name
);
1646 /* If we don't have iconv(), the only supported values for
1647 xgettext_global_source_encoding and thus also for
1648 xgettext_current_source_encoding are ASCII and UTF-8.
1649 convert_string() should not be called in this case. */
1650 #define convert_string(cd,string) (abort (), (string))
1653 /* Convert the given string from xgettext_current_source_encoding to
1654 the output file encoding (i.e. ASCII or UTF-8).
1655 The resulting string is either the argument string, or freshly allocated.
1656 The file_name and line_number are only used for error message purposes. */
1658 from_current_source_encoding (const char *string
,
1659 const char *file_name
, size_t line_number
)
1661 if (xgettext_current_source_encoding
== po_charset_ascii
)
1663 if (!is_ascii_string (string
))
1667 if (line_number
== (size_t)(-1))
1670 sprintf (buffer
, ":%ld", (long) line_number
);
1671 multiline_error (xstrdup (""),
1673 Non-ASCII string at %s%s.\n\
1674 Please specify the source encoding through --from-code.\n"),
1675 file_name
, buffer
));
1676 exit (EXIT_FAILURE
);
1679 else if (xgettext_current_source_encoding
!= po_charset_utf8
)
1680 string
= convert_string (xgettext_current_source_iconv
, string
);
1682 return (char *) string
;
1685 #define CONVERT_STRING(string) \
1686 string = from_current_source_encoding (string, pos->file_name, \
1690 /* Update the is_format[] flags depending on the information given in the
1693 set_format_flags_from_context (enum is_format is_format
[NFORMATS
],
1694 flag_context_ty context
, const char *string
,
1695 lex_pos_ty
*pos
, const char *pretty_msgstr
)
1699 if (context
.is_format1
!= undecided
|| context
.is_format2
!= undecided
)
1700 for (i
= 0; i
< NFORMATS
; i
++)
1702 if (is_format
[i
] == undecided
)
1704 if (formatstring_parsers
[i
] == current_formatstring_parser1
1705 && context
.is_format1
!= undecided
)
1706 is_format
[i
] = (enum is_format
) context
.is_format1
;
1707 if (formatstring_parsers
[i
] == current_formatstring_parser2
1708 && context
.is_format2
!= undecided
)
1709 is_format
[i
] = (enum is_format
) context
.is_format2
;
1711 if (possible_format_p (is_format
[i
]))
1713 struct formatstring_parser
*parser
= formatstring_parsers
[i
];
1714 char *invalid_reason
= NULL
;
1715 void *descr
= parser
->parse (string
, false, &invalid_reason
);
1718 parser
->free (descr
);
1721 /* The string is not a valid format string. */
1722 if (is_format
[i
] != possible
)
1726 error_with_progname
= false;
1727 if (pos
->line_number
== (size_t)(-1))
1730 sprintf (buffer
, ":%ld", (long) pos
->line_number
);
1731 multiline_warning (xasprintf (_("%s%s: warning: "),
1732 pos
->file_name
, buffer
),
1733 xasprintf (is_format
[i
] == yes_according_to_context
? _("Although being used in a format string position, the %s is not a valid %s format string. Reason: %s\n") : _("Although declared as such, the %s is not a valid %s format string. Reason: %s\n"),
1735 format_language_pretty
[i
],
1737 error_with_progname
= true;
1740 is_format
[i
] = impossible
;
1741 free (invalid_reason
);
1749 remember_a_message (message_list_ty
*mlp
, char *string
,
1750 flag_context_ty context
, lex_pos_ty
*pos
)
1752 enum is_format is_format
[NFORMATS
];
1753 enum is_wrap do_wrap
;
1761 /* See whether we shall exclude this message. */
1762 if (exclude
!= NULL
&& message_list_search (exclude
, msgid
) != NULL
)
1764 /* Tell the lexer to reset its comment buffer, so that the next
1765 message gets the correct comments. */
1766 xgettext_comment_reset ();
1771 for (i
= 0; i
< NFORMATS
; i
++)
1772 is_format
[i
] = undecided
;
1773 do_wrap
= undecided
;
1775 CONVERT_STRING (msgid
);
1777 if (msgid
[0] == '\0' && !xgettext_omit_header
)
1781 error_with_progname
= false;
1782 if (pos
->line_number
== (size_t)(-1))
1785 sprintf (buffer
, ":%ld", (long) pos
->line_number
);
1786 multiline_warning (xasprintf (_("%s%s: warning: "), pos
->file_name
,
1789 Empty msgid. It is reserved by GNU gettext:\n\
1790 gettext(\"\") returns the header entry with\n\
1791 meta information, not the empty string.\n")));
1792 error_with_progname
= true;
1795 /* See if we have seen this message before. */
1796 mp
= message_list_search (mlp
, msgid
);
1800 for (i
= 0; i
< NFORMATS
; i
++)
1801 is_format
[i
] = mp
->is_format
[i
];
1802 do_wrap
= mp
->do_wrap
;
1806 static lex_pos_ty dummypos
= { __FILE__
, __LINE__
};
1808 /* Construct the msgstr from the prefix and suffix, otherwise use the
1812 msgstr
= (char *) xmalloc (strlen (msgstr_prefix
)
1814 + strlen (msgstr_suffix
) + 1);
1815 stpcpy (stpcpy (stpcpy (msgstr
, msgstr_prefix
), msgid
),
1821 /* Allocate a new message and append the message to the list. */
1822 mp
= message_alloc (msgid
, NULL
, msgstr
, strlen (msgstr
) + 1, &dummypos
);
1823 /* Do not free msgid. */
1824 message_list_append (mlp
, mp
);
1827 /* Determine whether the context specifies that the msgid is a format
1829 set_format_flags_from_context (is_format
, context
, mp
->msgid
, pos
, "msgid");
1831 /* Ask the lexer for the comments it has seen. */
1833 size_t nitems_before
;
1834 size_t nitems_after
;
1836 bool add_all_remaining_comments
;
1838 nitems_before
= (mp
->comment_dot
!= NULL
? mp
->comment_dot
->nitems
: 0);
1840 add_all_remaining_comments
= add_all_comments
;
1843 const char *s
= xgettext_comment (j
);
1850 /* To reduce the possibility of unwanted matches we do a two
1851 step match: the line must contain `xgettext:' and one of
1852 the possible format description strings. */
1853 if ((t
= strstr (s
, "xgettext:")) != NULL
)
1856 enum is_format tmp_format
[NFORMATS
];
1857 enum is_wrap tmp_wrap
;
1860 t
+= strlen ("xgettext:");
1862 po_parse_comment_special (t
, &tmp_fuzzy
, tmp_format
, &tmp_wrap
);
1864 interesting
= false;
1865 for (i
= 0; i
< NFORMATS
; i
++)
1866 if (tmp_format
[i
] != undecided
)
1868 is_format
[i
] = tmp_format
[i
];
1871 if (tmp_wrap
!= undecided
)
1877 /* If the "xgettext:" marker was followed by an interesting
1878 keyword, and we updated our is_format/do_wrap variables,
1879 we don't print the comment as a #. comment. */
1883 /* When the comment tag is seen, it drags in not only the line
1884 which it starts, but all remaining comment lines. */
1885 if (add_all_remaining_comments
1886 || (add_all_remaining_comments
=
1887 (comment_tag
!= NULL
1888 && strncmp (s
, comment_tag
, strlen (comment_tag
)) == 0)))
1889 message_comment_dot_append (mp
, s
);
1892 nitems_after
= (mp
->comment_dot
!= NULL
? mp
->comment_dot
->nitems
: 0);
1894 /* Don't add the comments if they are a repetition of the tail of the
1895 already present comments. This avoids unneeded duplication if the
1896 same message appears several times, each time with the same comment. */
1897 if (nitems_before
< nitems_after
)
1899 size_t added
= nitems_after
- nitems_before
;
1901 if (added
<= nitems_before
)
1903 bool repeated
= true;
1905 for (i
= 0; i
< added
; i
++)
1906 if (strcmp (mp
->comment_dot
->item
[nitems_before
- added
+ i
],
1907 mp
->comment_dot
->item
[nitems_before
+ i
]) != 0)
1915 for (i
= 0; i
< added
; i
++)
1916 free ((char *) mp
->comment_dot
->item
[nitems_before
+ i
]);
1917 mp
->comment_dot
->nitems
= nitems_before
;
1923 /* If it is not already decided, through programmer comments, whether the
1924 msgid is a format string, examine the msgid. This is a heuristic. */
1925 for (i
= 0; i
< NFORMATS
; i
++)
1927 if (is_format
[i
] == undecided
1928 && (formatstring_parsers
[i
] == current_formatstring_parser1
1929 || formatstring_parsers
[i
] == current_formatstring_parser2
)
1930 /* But avoid redundancy: objc-format is stronger than c-format. */
1931 && !(i
== format_c
&& possible_format_p (is_format
[format_objc
]))
1932 && !(i
== format_objc
&& possible_format_p (is_format
[format_c
])))
1934 struct formatstring_parser
*parser
= formatstring_parsers
[i
];
1935 char *invalid_reason
= NULL
;
1936 void *descr
= parser
->parse (mp
->msgid
, false, &invalid_reason
);
1940 /* msgid is a valid format string. We mark only those msgids
1941 as format strings which contain at least one format directive
1942 and thus are format strings with a high probability. We
1943 don't mark strings without directives as format strings,
1944 because that would force the programmer to add
1945 "xgettext: no-c-format" anywhere where a translator wishes
1946 to use a percent sign. So, the msgfmt checking will not be
1947 perfect. Oh well. */
1948 if (parser
->get_number_of_directives (descr
) > 0)
1949 is_format
[i
] = possible
;
1951 parser
->free (descr
);
1955 /* msgid is not a valid format string. */
1956 is_format
[i
] = impossible
;
1957 free (invalid_reason
);
1960 mp
->is_format
[i
] = is_format
[i
];
1963 mp
->do_wrap
= do_wrap
== no
? no
: yes
; /* By default we wrap. */
1965 /* Remember where we saw this msgid. */
1967 message_comment_filepos (mp
, pos
->file_name
, pos
->line_number
);
1969 /* Tell the lexer to reset its comment buffer, so that the next
1970 message gets the correct comments. */
1971 xgettext_comment_reset ();
1978 remember_a_message_plural (message_ty
*mp
, char *string
,
1979 flag_context_ty context
, lex_pos_ty
*pos
)
1987 msgid_plural
= string
;
1989 CONVERT_STRING (msgid_plural
);
1991 /* See if the message is already a plural message. */
1992 if (mp
->msgid_plural
== NULL
)
1994 mp
->msgid_plural
= msgid_plural
;
1996 /* Construct the first plural form from the prefix and suffix,
1997 otherwise use the empty string. The translator will have to
1998 provide additional plural forms. */
2001 msgstr1
= (char *) xmalloc (strlen (msgstr_prefix
)
2002 + strlen (msgid_plural
)
2003 + strlen (msgstr_suffix
) + 1);
2004 stpcpy (stpcpy (stpcpy (msgstr1
, msgstr_prefix
), msgid_plural
),
2009 msgstr1_len
= strlen (msgstr1
) + 1;
2010 msgstr
= (char *) xmalloc (mp
->msgstr_len
+ msgstr1_len
);
2011 memcpy (msgstr
, mp
->msgstr
, mp
->msgstr_len
);
2012 memcpy (msgstr
+ mp
->msgstr_len
, msgstr1
, msgstr1_len
);
2013 mp
->msgstr
= msgstr
;
2014 mp
->msgstr_len
= mp
->msgstr_len
+ msgstr1_len
;
2016 /* Determine whether the context specifies that the msgid_plural is a
2018 set_format_flags_from_context (mp
->is_format
, context
, mp
->msgid_plural
,
2019 pos
, "msgid_plural");
2021 /* If it is not already decided, through programmer comments or
2022 the msgid, whether the msgid is a format string, examine the
2023 msgid_plural. This is a heuristic. */
2024 for (i
= 0; i
< NFORMATS
; i
++)
2025 if ((formatstring_parsers
[i
] == current_formatstring_parser1
2026 || formatstring_parsers
[i
] == current_formatstring_parser2
)
2027 && (mp
->is_format
[i
] == undecided
|| mp
->is_format
[i
] == possible
)
2028 /* But avoid redundancy: objc-format is stronger than c-format. */
2030 && possible_format_p (mp
->is_format
[format_objc
]))
2031 && !(i
== format_objc
2032 && possible_format_p (mp
->is_format
[format_c
])))
2034 struct formatstring_parser
*parser
= formatstring_parsers
[i
];
2035 char *invalid_reason
= NULL
;
2037 parser
->parse (mp
->msgid_plural
, false, &invalid_reason
);
2041 /* Same heuristic as in remember_a_message. */
2042 if (parser
->get_number_of_directives (descr
) > 0)
2043 mp
->is_format
[i
] = possible
;
2045 parser
->free (descr
);
2049 /* msgid_plural is not a valid format string. */
2050 mp
->is_format
[i
] = impossible
;
2051 free (invalid_reason
);
2056 free (msgid_plural
);
2067 static lex_pos_ty pos
= { __FILE__
, __LINE__
};
2069 if (msgid_bugs_address
!= NULL
&& msgid_bugs_address
[0] == '\0')
2070 multiline_warning (xasprintf (_("warning: ")),
2072 The option --msgid-bugs-address was not specified.\n\
2073 If you are using a `Makevars' file, please specify\n\
2074 the MSGID_BUGS_ADDRESS variable there; otherwise please\n\
2075 specify an --msgid-bugs-address command line option.\n\
2079 timestring
= po_strftime (&now
);
2081 msgstr
= xasprintf ("\
2082 Project-Id-Version: PACKAGE VERSION\n\
2083 Report-Msgid-Bugs-To: %s\n\
2084 POT-Creation-Date: %s\n\
2085 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n\
2086 Last-Translator: FULL NAME <EMAIL@ADDRESS>\n\
2087 Language-Team: LANGUAGE <LL@li.org>\n\
2088 MIME-Version: 1.0\n\
2089 Content-Type: text/plain; charset=CHARSET\n\
2090 Content-Transfer-Encoding: 8bit\n",
2091 msgid_bugs_address
!= NULL
? msgid_bugs_address
: "",
2095 mp
= message_alloc ("", NULL
, msgstr
, strlen (msgstr
) + 1, &pos
);
2097 message_comment_append (mp
,
2098 copyright_holder
[0] != '\0'
2100 SOME DESCRIPTIVE TITLE.\n\
2101 Copyright (C) YEAR %s\n\
2102 This file is distributed under the same license as the PACKAGE package.\n\
2103 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n",
2106 SOME DESCRIPTIVE TITLE.\n\
2107 This file is put in the public domain.\n\
2108 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n");
2110 mp
->is_fuzzy
= true;
2116 finalize_header (msgdomain_list_ty
*mdlp
)
2118 /* If the generated PO file has plural forms, add a Plural-Forms template
2119 to the constructed header. */
2125 for (i
= 0; i
< mdlp
->nitems
; i
++)
2127 message_list_ty
*mlp
= mdlp
->item
[i
]->messages
;
2129 for (j
= 0; j
< mlp
->nitems
; j
++)
2131 message_ty
*mp
= mlp
->item
[j
];
2133 if (mp
->msgid_plural
!= NULL
)
2145 message_ty
*header
= message_list_search (mdlp
->item
[0]->messages
, "");
2147 && strstr (header
->msgstr
, "Plural-Forms:") == NULL
)
2149 size_t insertpos
= strlen (header
->msgstr
);
2154 suffix
= "\nPlural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n";
2155 if (insertpos
== 0 || header
->msgstr
[insertpos
-1] == '\n')
2157 suffix_len
= strlen (suffix
);
2158 new_msgstr
= (char *) xmalloc (header
->msgstr_len
+ suffix_len
);
2159 memcpy (new_msgstr
, header
->msgstr
, insertpos
);
2160 memcpy (new_msgstr
+ insertpos
, suffix
, suffix_len
);
2161 memcpy (new_msgstr
+ insertpos
+ suffix_len
,
2162 header
->msgstr
+ insertpos
,
2163 header
->msgstr_len
- insertpos
);
2164 header
->msgstr
= new_msgstr
;
2165 header
->msgstr_len
= header
->msgstr_len
+ suffix_len
;
2170 /* If not all the strings were plain ASCII, or if the output syntax
2171 requires a charset conversion, set the charset in the header to UTF-8.
2172 All messages have already been converted to UTF-8 in remember_a_message
2173 and remember_a_message_plural. */
2175 bool has_nonascii
= false;
2178 for (i
= 0; i
< mdlp
->nitems
; i
++)
2180 message_list_ty
*mlp
= mdlp
->item
[i
]->messages
;
2182 if (!is_ascii_message_list (mlp
))
2183 has_nonascii
= true;
2187 || output_syntax
== syntax_properties
2188 || output_syntax
== syntax_stringtable
)
2190 message_list_ty
*mlp
= mdlp
->item
[0]->messages
;
2192 iconv_message_list (mlp
, po_charset_utf8
, po_charset_utf8
, NULL
);
2198 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
2199 #define ENDOF(a) ((a) + SIZEOF(a))
2203 language_to_extractor (const char *name
)
2208 extractor_func func
;
2209 flag_context_list_table_ty
*flag_table
;
2210 struct formatstring_parser
*formatstring_parser1
;
2211 struct formatstring_parser
*formatstring_parser2
;
2213 typedef struct table_ty table_ty
;
2215 static table_ty table
[] =
2234 SCANNERS_STRINGTABLE
2237 /* Here may follow more languages and their scanners: pike, etc...
2238 Make sure new scanners honor the --exclude-file option. */
2243 for (tp
= table
; tp
< ENDOF(table
); ++tp
)
2244 if (c_strcasecmp (name
, tp
->name
) == 0)
2246 extractor_ty result
;
2248 result
.func
= tp
->func
;
2249 result
.flag_table
= tp
->flag_table
;
2250 result
.formatstring_parser1
= tp
->formatstring_parser1
;
2251 result
.formatstring_parser2
= tp
->formatstring_parser2
;
2253 /* Handle --qt. It's preferrable to handle this facility here rather
2254 than through an option --language=C++/Qt because the latter would
2255 conflict with the language "C++" regarding the file extensions. */
2256 if (recognize_format_qt
&& strcmp (tp
->name
, "C++") == 0)
2257 result
.formatstring_parser2
= &formatstring_qt
;
2262 error (EXIT_FAILURE
, 0, _("language `%s' unknown"), name
);
2265 extractor_ty result
= { NULL
, NULL
, NULL
, NULL
};
2272 extension_to_language (const char *extension
)
2276 const char *extension
;
2277 const char *language
;
2279 typedef struct table_ty table_ty
;
2281 static table_ty table
[] =
2291 EXTENSIONS_SMALLTALK
2293 EXTENSIONS_PROPERTIES
2300 EXTENSIONS_STRINGTABLE
2303 /* Here may follow more file extensions... */
2308 for (tp
= table
; tp
< ENDOF(table
); ++tp
)
2309 if (strcmp (extension
, tp
->extension
) == 0)
2310 return tp
->language
;