Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / xgettext.c
blobeb035f91b5efd7b3971c70555ca8162bef5637c1
1 /* Extracts strings from C source file to Uniforum style .po file.
2 Copyright (C) 1995-1998, 2000-2005 Free Software Foundation, Inc.
3 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 #include <alloca.h>
24 #include <ctype.h>
25 #include <errno.h>
26 #include <getopt.h>
27 #include <stdio.h>
28 #include <time.h>
29 #include <stdlib.h>
30 #include <stdbool.h>
31 #include <string.h>
32 #include <locale.h>
33 #include <limits.h>
35 #include "xgettext.h"
36 #include "closeout.h"
37 #include "dir-list.h"
38 #include "file-list.h"
39 #include "str-list.h"
40 #include "error.h"
41 #include "error-progname.h"
42 #include "progname.h"
43 #include "relocatable.h"
44 #include "basename.h"
45 #include "xerror.h"
46 #include "xalloc.h"
47 #include "xallocsa.h"
48 #include "strstr.h"
49 #include "xerror.h"
50 #include "exit.h"
51 #include "pathname.h"
52 #include "c-strcase.h"
53 #include "stpcpy.h"
54 #include "open-po.h"
55 #include "read-po-abstract.h"
56 #include "message.h"
57 #include "po-charset.h"
58 #include "msgl-iconv.h"
59 #include "msgl-ascii.h"
60 #include "po-time.h"
61 #include "write-po.h"
62 #include "format.h"
63 #include "gettext.h"
65 /* A convenience macro. I don't like writing gettext() every time. */
66 #define _(str) gettext (str)
69 #ifdef __cplusplus
70 extern "C" {
71 #endif
73 #include "x-c.h"
74 #include "x-po.h"
75 #include "x-sh.h"
76 #include "x-python.h"
77 #include "x-lisp.h"
78 #include "x-elisp.h"
79 #include "x-librep.h"
80 #include "x-scheme.h"
81 #include "x-smalltalk.h"
82 #include "x-java.h"
83 #include "x-properties.h"
84 #include "x-csharp.h"
85 #include "x-awk.h"
86 #include "x-ycp.h"
87 #include "x-tcl.h"
88 #include "x-perl.h"
89 #include "x-php.h"
90 #include "x-stringtable.h"
91 #include "x-rst.h"
92 #include "x-glade.h"
94 #ifdef __cplusplus
96 #endif
99 /* If nonzero add all comments immediately preceding one of the keywords. */
100 static bool add_all_comments = false;
102 /* Tag used in comment of prevailing domain. */
103 static char *comment_tag;
105 /* Name of default domain file. If not set defaults to messages.po. */
106 static const char *default_domain;
108 /* If called with --debug option the output reflects whether format
109 string recognition is done automatically or forced by the user. */
110 static int do_debug;
112 /* Content of .po files with symbols to be excluded. */
113 message_list_ty *exclude;
115 /* Force output of PO file even if empty. */
116 static int force_po;
118 /* Copyright holder of the output file and the translations. */
119 static const char *copyright_holder = "THE PACKAGE'S COPYRIGHT HOLDER";
121 /* Email address or URL for reports of bugs in msgids. */
122 static const char *msgid_bugs_address = NULL;
124 /* String used as prefix for msgstr. */
125 static const char *msgstr_prefix;
127 /* String used as suffix for msgstr. */
128 static const char *msgstr_suffix;
130 /* Directory in which output files are created. */
131 static char *output_dir;
133 /* The output syntax: .pot or .properties or .strings. */
134 static input_syntax_ty output_syntax = syntax_po;
136 /* If nonzero omit header with information about this run. */
137 int xgettext_omit_header;
139 /* Table of flag_context_list_ty tables. */
140 static flag_context_list_table_ty flag_table_c;
141 static flag_context_list_table_ty flag_table_objc;
142 static flag_context_list_table_ty flag_table_gcc_internal;
143 static flag_context_list_table_ty flag_table_sh;
144 static flag_context_list_table_ty flag_table_python;
145 static flag_context_list_table_ty flag_table_lisp;
146 static flag_context_list_table_ty flag_table_elisp;
147 static flag_context_list_table_ty flag_table_librep;
148 static flag_context_list_table_ty flag_table_scheme;
149 static flag_context_list_table_ty flag_table_java;
150 static flag_context_list_table_ty flag_table_csharp;
151 static flag_context_list_table_ty flag_table_awk;
152 static flag_context_list_table_ty flag_table_ycp;
153 static flag_context_list_table_ty flag_table_tcl;
154 static flag_context_list_table_ty flag_table_perl;
155 static flag_context_list_table_ty flag_table_php;
157 /* If true, recognize Qt format strings. */
158 static bool recognize_format_qt;
160 /* Canonicalized encoding name for all input files. */
161 const char *xgettext_global_source_encoding;
163 #if HAVE_ICONV
164 /* Converter from xgettext_global_source_encoding to UTF-8 (except from
165 ASCII or UTF-8, when this conversion is a no-op). */
166 iconv_t xgettext_global_source_iconv;
167 #endif
169 /* Canonicalized encoding name for the current input file. */
170 const char *xgettext_current_source_encoding;
172 #if HAVE_ICONV
173 /* Converter from xgettext_current_source_encoding to UTF-8 (except from
174 ASCII or UTF-8, when this conversion is a no-op). */
175 iconv_t xgettext_current_source_iconv;
176 #endif
178 /* Long options. */
179 static const struct option long_options[] =
181 { "add-comments", optional_argument, NULL, 'c' },
182 { "add-location", no_argument, &line_comment, 1 },
183 { "c++", no_argument, NULL, 'C' },
184 { "copyright-holder", required_argument, NULL, CHAR_MAX + 1 },
185 { "debug", no_argument, &do_debug, 1 },
186 { "default-domain", required_argument, NULL, 'd' },
187 { "directory", required_argument, NULL, 'D' },
188 { "escape", no_argument, NULL, 'E' },
189 { "exclude-file", required_argument, NULL, 'x' },
190 { "extract-all", no_argument, NULL, 'a' },
191 { "files-from", required_argument, NULL, 'f' },
192 { "flag", required_argument, NULL, CHAR_MAX + 8 },
193 { "force-po", no_argument, &force_po, 1 },
194 { "foreign-user", no_argument, NULL, CHAR_MAX + 2 },
195 { "from-code", required_argument, NULL, CHAR_MAX + 3 },
196 { "help", no_argument, NULL, 'h' },
197 { "indent", no_argument, NULL, 'i' },
198 { "join-existing", no_argument, NULL, 'j' },
199 { "keyword", optional_argument, NULL, 'k' },
200 { "language", required_argument, NULL, 'L' },
201 { "msgid-bugs-address", required_argument, NULL, CHAR_MAX + 5 },
202 { "msgstr-prefix", optional_argument, NULL, 'm' },
203 { "msgstr-suffix", optional_argument, NULL, 'M' },
204 { "no-escape", no_argument, NULL, 'e' },
205 { "no-location", no_argument, &line_comment, 0 },
206 { "no-wrap", no_argument, NULL, CHAR_MAX + 4 },
207 { "omit-header", no_argument, &xgettext_omit_header, 1 },
208 { "output", required_argument, NULL, 'o' },
209 { "output-dir", required_argument, NULL, 'p' },
210 { "properties-output", no_argument, NULL, CHAR_MAX + 6 },
211 { "qt", no_argument, NULL, CHAR_MAX + 9 },
212 { "sort-by-file", no_argument, NULL, 'F' },
213 { "sort-output", no_argument, NULL, 's' },
214 { "strict", no_argument, NULL, 'S' },
215 { "string-limit", required_argument, NULL, 'l' },
216 { "stringtable-output", no_argument, NULL, CHAR_MAX + 7 },
217 { "trigraphs", no_argument, NULL, 'T' },
218 { "version", no_argument, NULL, 'V' },
219 { "width", required_argument, NULL, 'w', },
220 { NULL, 0, NULL, 0 }
224 /* The extractors must all be functions returning void and taking three
225 arguments designating the input stream and one message domain list argument
226 in which to add the messages. */
227 typedef void (*extractor_func) (FILE *fp, const char *real_filename,
228 const char *logical_filename,
229 flag_context_list_table_ty *flag_table,
230 msgdomain_list_ty *mdlp);
232 typedef struct extractor_ty extractor_ty;
233 struct extractor_ty
235 extractor_func func;
236 flag_context_list_table_ty *flag_table;
237 struct formatstring_parser *formatstring_parser1;
238 struct formatstring_parser *formatstring_parser2;
242 /* Forward declaration of local functions. */
243 static void usage (int status)
244 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ > 4) || __GNUC__ > 2)
245 __attribute__ ((noreturn))
246 #endif
248 static void read_exclusion_file (char *file_name);
249 static void extract_from_file (const char *file_name, extractor_ty extractor,
250 msgdomain_list_ty *mdlp);
251 static message_ty *construct_header (void);
252 static void finalize_header (msgdomain_list_ty *mdlp);
253 static extractor_ty language_to_extractor (const char *name);
254 static const char *extension_to_language (const char *extension);
258 main (int argc, char *argv[])
260 int cnt;
261 int optchar;
262 bool do_help = false;
263 bool do_version = false;
264 msgdomain_list_ty *mdlp;
265 bool join_existing = false;
266 bool no_default_keywords = false;
267 bool some_additional_keywords = false;
268 bool sort_by_msgid = false;
269 bool sort_by_filepos = false;
270 const char *file_name;
271 const char *files_from = NULL;
272 string_list_ty *file_list;
273 char *output_file = NULL;
274 const char *language = NULL;
275 extractor_ty extractor = { NULL, NULL, NULL, NULL };
277 /* Set program name for messages. */
278 set_program_name (argv[0]);
279 error_print_progname = maybe_print_progname;
281 #ifdef HAVE_SETLOCALE
282 /* Set locale via LC_ALL. */
283 setlocale (LC_ALL, "");
284 #endif
286 /* Set the text message domain. */
287 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
288 textdomain (PACKAGE);
290 /* Ensure that write errors on stdout are detected. */
291 atexit (close_stdout);
293 /* Set initial value of variables. */
294 default_domain = MESSAGE_DOMAIN_DEFAULT;
295 xgettext_global_source_encoding = po_charset_ascii;
296 init_flag_table_c ();
297 init_flag_table_objc ();
298 init_flag_table_gcc_internal ();
299 init_flag_table_sh ();
300 init_flag_table_python ();
301 init_flag_table_lisp ();
302 init_flag_table_elisp ();
303 init_flag_table_librep ();
304 init_flag_table_scheme ();
305 init_flag_table_java ();
306 init_flag_table_csharp ();
307 init_flag_table_awk ();
308 init_flag_table_ycp ();
309 init_flag_table_tcl ();
310 init_flag_table_perl ();
311 init_flag_table_php ();
313 while ((optchar = getopt_long (argc, argv,
314 "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:x:",
315 long_options, NULL)) != EOF)
316 switch (optchar)
318 case '\0': /* Long option. */
319 break;
320 case 'a':
321 x_c_extract_all ();
322 x_sh_extract_all ();
323 x_python_extract_all ();
324 x_lisp_extract_all ();
325 x_elisp_extract_all ();
326 x_librep_extract_all ();
327 x_scheme_extract_all ();
328 x_java_extract_all ();
329 x_csharp_extract_all ();
330 x_awk_extract_all ();
331 x_tcl_extract_all ();
332 x_perl_extract_all ();
333 x_php_extract_all ();
334 x_glade_extract_all ();
335 break;
336 case 'c':
337 if (optarg == NULL)
339 add_all_comments = true;
340 comment_tag = NULL;
342 else
344 add_all_comments = false;
345 comment_tag = optarg;
346 /* We ignore leading white space. */
347 while (isspace ((unsigned char) *comment_tag))
348 ++comment_tag;
350 break;
351 case 'C':
352 language = "C++";
353 break;
354 case 'd':
355 default_domain = optarg;
356 break;
357 case 'D':
358 dir_list_append (optarg);
359 break;
360 case 'e':
361 message_print_style_escape (false);
362 break;
363 case 'E':
364 message_print_style_escape (true);
365 break;
366 case 'f':
367 files_from = optarg;
368 break;
369 case 'F':
370 sort_by_filepos = true;
371 break;
372 case 'h':
373 do_help = true;
374 break;
375 case 'i':
376 message_print_style_indent ();
377 break;
378 case 'j':
379 join_existing = true;
380 break;
381 case 'k':
382 if (optarg == NULL || *optarg != '\0')
384 x_c_keyword (optarg);
385 x_objc_keyword (optarg);
386 x_sh_keyword (optarg);
387 x_python_keyword (optarg);
388 x_lisp_keyword (optarg);
389 x_elisp_keyword (optarg);
390 x_librep_keyword (optarg);
391 x_scheme_keyword (optarg);
392 x_java_keyword (optarg);
393 x_csharp_keyword (optarg);
394 x_awk_keyword (optarg);
395 x_tcl_keyword (optarg);
396 x_perl_keyword (optarg);
397 x_php_keyword (optarg);
398 x_glade_keyword (optarg);
399 if (optarg == NULL)
400 no_default_keywords = true;
401 else
402 some_additional_keywords = true;
404 break;
405 case 'l':
406 /* Accepted for backward compatibility with 0.10.35. */
407 break;
408 case 'L':
409 language = optarg;
410 break;
411 case 'm':
412 /* -m takes an optional argument. If none is given "" is assumed. */
413 msgstr_prefix = optarg == NULL ? "" : optarg;
414 break;
415 case 'M':
416 /* -M takes an optional argument. If none is given "" is assumed. */
417 msgstr_suffix = optarg == NULL ? "" : optarg;
418 break;
419 case 'n':
420 line_comment = 1;
421 break;
422 case 'o':
423 output_file = optarg;
424 break;
425 case 'p':
427 size_t len = strlen (optarg);
429 if (output_dir != NULL)
430 free (output_dir);
432 if (optarg[len - 1] == '/')
433 output_dir = xstrdup (optarg);
434 else
435 output_dir = xasprintf ("%s/", optarg);
437 break;
438 case 's':
439 sort_by_msgid = true;
440 break;
441 case 'S':
442 message_print_style_uniforum ();
443 break;
444 case 'T':
445 x_c_trigraphs ();
446 break;
447 case 'V':
448 do_version = true;
449 break;
450 case 'w':
452 int value;
453 char *endp;
454 value = strtol (optarg, &endp, 10);
455 if (endp != optarg)
456 message_page_width_set (value);
458 break;
459 case 'x':
460 read_exclusion_file (optarg);
461 break;
462 case CHAR_MAX + 1: /* --copyright-holder */
463 copyright_holder = optarg;
464 break;
465 case CHAR_MAX + 2: /* --foreign-user */
466 copyright_holder = "";
467 break;
468 case CHAR_MAX + 3: /* --from-code */
469 xgettext_global_source_encoding = po_charset_canonicalize (optarg);
470 if (xgettext_global_source_encoding == NULL)
471 xgettext_global_source_encoding = po_charset_ascii;
472 break;
473 case CHAR_MAX + 4: /* --no-wrap */
474 message_page_width_ignore ();
475 break;
476 case CHAR_MAX + 5: /* --msgid-bugs-address */
477 msgid_bugs_address = optarg;
478 break;
479 case CHAR_MAX + 6: /* --properties-output */
480 message_print_syntax_properties ();
481 output_syntax = syntax_properties;
482 break;
483 case CHAR_MAX + 7: /* --stringtable-output */
484 message_print_syntax_stringtable ();
485 output_syntax = syntax_stringtable;
486 break;
487 case CHAR_MAX + 8: /* --flag */
488 xgettext_record_flag (optarg);
489 break;
490 case CHAR_MAX + 9: /* --qt */
491 recognize_format_qt = true;
492 break;
493 default:
494 usage (EXIT_FAILURE);
495 /* NOTREACHED */
498 /* Version information requested. */
499 if (do_version)
501 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
502 /* xgettext: no-wrap */
503 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
504 This is free software; see the source for copying conditions. There is NO\n\
505 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
507 "1995-1998, 2000-2005");
508 printf (_("Written by %s.\n"), "Ulrich Drepper");
509 exit (EXIT_SUCCESS);
512 /* Help is requested. */
513 if (do_help)
514 usage (EXIT_SUCCESS);
516 /* Verify selected options. */
517 if (!line_comment && sort_by_filepos)
518 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
519 "--no-location", "--sort-by-file");
521 if (sort_by_msgid && sort_by_filepos)
522 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
523 "--sort-output", "--sort-by-file");
525 if (join_existing && strcmp (default_domain, "-") == 0)
526 error (EXIT_FAILURE, 0, _("\
527 --join-existing cannot be used when output is written to stdout"));
529 if (no_default_keywords && !some_additional_keywords)
531 error (0, 0, _("\
532 xgettext cannot work without keywords to look for"));
533 usage (EXIT_FAILURE);
536 /* Test whether we have some input files given. */
537 if (files_from == NULL && optind >= argc)
539 error (EXIT_SUCCESS, 0, _("no input file given"));
540 usage (EXIT_FAILURE);
543 /* Determine extractor from language. */
544 if (language != NULL)
545 extractor = language_to_extractor (language);
547 /* Canonize msgstr prefix/suffix. */
548 if (msgstr_prefix != NULL && msgstr_suffix == NULL)
549 msgstr_suffix = "";
550 else if (msgstr_prefix == NULL && msgstr_suffix != NULL)
551 msgstr_prefix = "";
553 /* Default output directory is the current directory. */
554 if (output_dir == NULL)
555 output_dir = ".";
557 /* Construct the name of the output file. If the default domain has
558 the special name "-" we write to stdout. */
559 if (output_file)
561 if (IS_ABSOLUTE_PATH (output_file) || strcmp (output_file, "-") == 0)
562 file_name = xstrdup (output_file);
563 else
564 /* Please do NOT add a .po suffix! */
565 file_name = concatenated_pathname (output_dir, output_file, NULL);
567 else if (strcmp (default_domain, "-") == 0)
568 file_name = "-";
569 else
570 file_name = concatenated_pathname (output_dir, default_domain, ".po");
572 /* Determine list of files we have to process. */
573 if (files_from != NULL)
574 file_list = read_names_from_file (files_from);
575 else
576 file_list = string_list_alloc ();
577 /* Append names from command line. */
578 for (cnt = optind; cnt < argc; ++cnt)
579 string_list_append_unique (file_list, argv[cnt]);
581 /* Allocate converter from xgettext_global_source_encoding to UTF-8 (except
582 from ASCII or UTF-8, when this conversion is a no-op). */
583 if (xgettext_global_source_encoding != po_charset_ascii
584 && xgettext_global_source_encoding != po_charset_utf8)
586 #if HAVE_ICONV
587 iconv_t cd;
589 /* Avoid glibc-2.1 bug with EUC-KR. */
590 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
591 if (strcmp (xgettext_global_source_encoding, "EUC-KR") == 0)
592 cd = (iconv_t)(-1);
593 else
594 # endif
595 cd = iconv_open (po_charset_utf8, xgettext_global_source_encoding);
596 if (cd == (iconv_t)(-1))
597 error (EXIT_FAILURE, 0, _("\
598 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
599 and iconv() does not support this conversion."),
600 xgettext_global_source_encoding, po_charset_utf8,
601 basename (program_name));
602 xgettext_global_source_iconv = cd;
603 #else
604 error (EXIT_FAILURE, 0, _("\
605 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
606 This version was built without iconv()."),
607 xgettext_global_source_encoding, po_charset_utf8,
608 basename (program_name));
609 #endif
612 /* Allocate a message list to remember all the messages. */
613 mdlp = msgdomain_list_alloc (true);
615 /* Generate a header, so that we know how and when this PO file was
616 created. */
617 if (!xgettext_omit_header)
618 message_list_append (mdlp->item[0]->messages, construct_header ());
620 /* Read in the old messages, so that we can add to them. */
621 if (join_existing)
623 /* Temporarily reset the directory list to empty, because file_name
624 is an output file and therefore should not be searched for. */
625 void *saved_directory_list = dir_list_save_reset ();
626 extractor_ty po_extractor = { extract_po, NULL, NULL, NULL };
628 extract_from_file (file_name, po_extractor, mdlp);
629 if (!is_ascii_msgdomain_list (mdlp))
630 mdlp = iconv_msgdomain_list (mdlp, "UTF-8", file_name);
632 dir_list_restore (saved_directory_list);
635 /* Process all input files. */
636 for (cnt = 0; cnt < file_list->nitems; ++cnt)
638 const char *filename;
639 extractor_ty this_file_extractor;
641 filename = file_list->item[cnt];
643 if (extractor.func)
644 this_file_extractor = extractor;
645 else
647 const char *base;
648 char *reduced;
649 const char *extension;
650 const char *language;
652 base = strrchr (filename, '/');
653 if (!base)
654 base = filename;
656 reduced = xstrdup (base);
657 /* Remove a trailing ".in" - it's a generic suffix. */
658 if (strlen (reduced) >= 3
659 && memcmp (reduced + strlen (reduced) - 3, ".in", 3) == 0)
660 reduced[strlen (reduced) - 3] = '\0';
662 /* Work out what the file extension is. */
663 extension = strrchr (reduced, '.');
664 if (extension)
665 ++extension;
666 else
667 extension = "";
669 /* Derive the language from the extension, and the extractor
670 function from the language. */
671 language = extension_to_language (extension);
672 if (language == NULL)
674 error (0, 0, _("\
675 warning: file `%s' extension `%s' is unknown; will try C"), filename, extension);
676 language = "C";
678 this_file_extractor = language_to_extractor (language);
680 free (reduced);
683 /* Extract the strings from the file. */
684 extract_from_file (filename, this_file_extractor, mdlp);
686 string_list_free (file_list);
688 /* Finalize the constructed header. */
689 if (!xgettext_omit_header)
690 finalize_header (mdlp);
692 /* Free the allocated converter. */
693 #if HAVE_ICONV
694 if (xgettext_global_source_encoding != po_charset_ascii
695 && xgettext_global_source_encoding != po_charset_utf8)
696 iconv_close (xgettext_global_source_iconv);
697 #endif
699 /* Sorting the list of messages. */
700 if (sort_by_filepos)
701 msgdomain_list_sort_by_filepos (mdlp);
702 else if (sort_by_msgid)
703 msgdomain_list_sort_by_msgid (mdlp);
705 /* Write the PO file. */
706 msgdomain_list_print (mdlp, file_name, force_po, do_debug);
708 exit (EXIT_SUCCESS);
712 /* Display usage information and exit. */
713 static void
714 usage (int status)
716 if (status != EXIT_SUCCESS)
717 fprintf (stderr, _("Try `%s --help' for more information.\n"),
718 program_name);
719 else
721 printf (_("\
722 Usage: %s [OPTION] [INPUTFILE]...\n\
723 "), program_name);
724 printf ("\n");
725 printf (_("\
726 Extract translatable strings from given input files.\n\
727 "));
728 printf ("\n");
729 /* xgettext: no-wrap */
730 printf (_("\
731 Mandatory arguments to long options are mandatory for short options too.\n\
732 Similarly for optional arguments.\n\
733 "));
734 printf ("\n");
735 printf (_("\
736 Input file location:\n"));
737 printf (_("\
738 INPUTFILE ... input files\n"));
739 printf (_("\
740 -f, --files-from=FILE get list of input files from FILE\n"));
741 printf (_("\
742 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
743 printf (_("\
744 If input file is -, standard input is read.\n"));
745 printf ("\n");
746 printf (_("\
747 Output file location:\n"));
748 printf (_("\
749 -d, --default-domain=NAME use NAME.po for output (instead of messages.po)\n"));
750 printf (_("\
751 -o, --output=FILE write output to specified file\n"));
752 printf (_("\
753 -p, --output-dir=DIR output files will be placed in directory DIR\n"));
754 printf (_("\
755 If output file is -, output is written to standard output.\n"));
756 printf ("\n");
757 printf (_("\
758 Choice of input file language:\n"));
759 printf (_("\
760 -L, --language=NAME recognise the specified language\n\
761 (C, C++, ObjectiveC, PO, Shell, Python, Lisp,\n\
762 EmacsLisp, librep, Scheme, Smalltalk, Java,\n\
763 JavaProperties, C#, awk, YCP, Tcl, Perl, PHP,\n\
764 GCC-source, NXStringTable, RST, Glade)\n"));
765 printf (_("\
766 -C, --c++ shorthand for --language=C++\n"));
767 printf (_("\
768 By default the language is guessed depending on the input file name extension.\n"));
769 printf ("\n");
770 printf (_("\
771 Input file interpretation:\n"));
772 printf (_("\
773 --from-code=NAME encoding of input files\n\
774 (except for Python, Tcl, Glade)\n"));
775 printf (_("\
776 By default the input files are assumed to be in ASCII.\n"));
777 printf ("\n");
778 printf (_("\
779 Operation mode:\n"));
780 printf (_("\
781 -j, --join-existing join messages with existing file\n"));
782 printf (_("\
783 -x, --exclude-file=FILE.po entries from FILE.po are not extracted\n"));
784 printf (_("\
785 -c, --add-comments[=TAG] place comment block with TAG (or those\n\
786 preceding keyword lines) in output file\n"));
787 printf ("\n");
788 printf (_("\
789 Language specific options:\n"));
790 printf (_("\
791 -a, --extract-all extract all strings\n"));
792 printf (_("\
793 (only languages C, C++, ObjectiveC, Shell,\n\
794 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
795 C#, awk, Tcl, Perl, PHP, GCC-source, Glade)\n"));
796 printf (_("\
797 -k, --keyword[=WORD] additional keyword to be looked for (without\n\
798 WORD means not to use default keywords)\n"));
799 printf (_("\
800 (only languages C, C++, ObjectiveC, Shell,\n\
801 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
802 C#, awk, Tcl, Perl, PHP, GCC-source, Glade)\n"));
803 printf (_("\
804 --flag=WORD:ARG:FLAG additional flag for strings inside the argument\n\
805 number ARG of keyword WORD\n"));
806 printf (_("\
807 (only languages C, C++, ObjectiveC, Shell,\n\
808 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
809 C#, awk, YCP, Tcl, Perl, PHP, GCC-source)\n"));
810 printf (_("\
811 -T, --trigraphs understand ANSI C trigraphs for input\n"));
812 printf (_("\
813 (only languages C, C++, ObjectiveC)\n"));
814 printf (_("\
815 --qt recognize Qt format strings\n"));
816 printf (_("\
817 (only language C++)\n"));
818 printf (_("\
819 --debug more detailed formatstring recognition result\n"));
820 printf ("\n");
821 printf (_("\
822 Output details:\n"));
823 printf (_("\
824 -e, --no-escape do not use C escapes in output (default)\n"));
825 printf (_("\
826 -E, --escape use C escapes in output, no extended chars\n"));
827 printf (_("\
828 --force-po write PO file even if empty\n"));
829 printf (_("\
830 -i, --indent write the .po file using indented style\n"));
831 printf (_("\
832 --no-location do not write '#: filename:line' lines\n"));
833 printf (_("\
834 -n, --add-location generate '#: filename:line' lines (default)\n"));
835 printf (_("\
836 --strict write out strict Uniforum conforming .po file\n"));
837 printf (_("\
838 --properties-output write out a Java .properties file\n"));
839 printf (_("\
840 --stringtable-output write out a NeXTstep/GNUstep .strings file\n"));
841 printf (_("\
842 -w, --width=NUMBER set output page width\n"));
843 printf (_("\
844 --no-wrap do not break long message lines, longer than\n\
845 the output page width, into several lines\n"));
846 printf (_("\
847 -s, --sort-output generate sorted output\n"));
848 printf (_("\
849 -F, --sort-by-file sort output by file location\n"));
850 printf (_("\
851 --omit-header don't write header with `msgid \"\"' entry\n"));
852 printf (_("\
853 --copyright-holder=STRING set copyright holder in output\n"));
854 printf (_("\
855 --foreign-user omit FSF copyright in output for foreign user\n"));
856 printf (_("\
857 --msgid-bugs-address=EMAIL@ADDRESS set report address for msgid bugs\n"));
858 printf (_("\
859 -m, --msgstr-prefix[=STRING] use STRING or \"\" as prefix for msgstr entries\n"));
860 printf (_("\
861 -M, --msgstr-suffix[=STRING] use STRING or \"\" as suffix for msgstr entries\n"));
862 printf ("\n");
863 printf (_("\
864 Informative output:\n"));
865 printf (_("\
866 -h, --help display this help and exit\n"));
867 printf (_("\
868 -V, --version output version information and exit\n"));
869 printf ("\n");
870 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
871 stdout);
874 exit (status);
878 static void
879 exclude_directive_domain (abstract_po_reader_ty *pop, char *name)
881 po_gram_error_at_line (&gram_pos,
882 _("this file may not contain domain directives"));
886 static void
887 exclude_directive_message (abstract_po_reader_ty *pop,
888 char *msgid,
889 lex_pos_ty *msgid_pos,
890 char *msgid_plural,
891 char *msgstr, size_t msgstr_len,
892 lex_pos_ty *msgstr_pos,
893 bool force_fuzzy, bool obsolete)
895 message_ty *mp;
897 /* See if this message ID has been seen before. */
898 if (exclude == NULL)
899 exclude = message_list_alloc (true);
900 mp = message_list_search (exclude, msgid);
901 if (mp != NULL)
902 free (msgid);
903 else
905 mp = message_alloc (msgid, msgid_plural, "", 1, msgstr_pos);
906 /* Do not free msgid. */
907 message_list_append (exclude, mp);
910 /* All we care about is the msgid. Throw the msgstr away.
911 Don't even check for duplicate msgids. */
912 free (msgstr);
916 /* So that the one parser can be used for multiple programs, and also
917 use good data hiding and encapsulation practices, an object
918 oriented approach has been taken. An object instance is allocated,
919 and all actions resulting from the parse will be through
920 invocations of method functions of that object. */
922 static abstract_po_reader_class_ty exclude_methods =
924 sizeof (abstract_po_reader_ty),
925 NULL, /* constructor */
926 NULL, /* destructor */
927 NULL, /* parse_brief */
928 NULL, /* parse_debrief */
929 exclude_directive_domain,
930 exclude_directive_message,
931 NULL, /* comment */
932 NULL, /* comment_dot */
933 NULL, /* comment_filepos */
934 NULL, /* comment_special */
938 static void
939 read_exclusion_file (char *filename)
941 char *real_filename;
942 FILE *fp = open_po_file (filename, &real_filename, true);
943 abstract_po_reader_ty *pop;
945 pop = po_reader_alloc (&exclude_methods);
946 po_scan (pop, fp, real_filename, filename, input_syntax);
947 po_reader_free (pop);
949 if (fp != stdin)
950 fclose (fp);
954 void
955 split_keywordspec (const char *spec,
956 const char **endp, int *argnum1p, int *argnum2p)
958 const char *p;
960 /* Start parsing from the end. */
961 p = spec + strlen (spec);
962 if (p > spec && isdigit ((unsigned char) p[-1]))
964 const char *last_arg;
967 p--;
968 while (p > spec && isdigit ((unsigned char) p[-1]));
970 last_arg = p;
972 if (p > spec && p[-1] == ',')
974 p--;
976 if (p > spec && isdigit ((unsigned char) p[-1]))
978 const char *first_arg;
981 p--;
982 while (p > spec && isdigit ((unsigned char) p[-1]));
984 first_arg = p;
986 if (p > spec && p[-1] == ':')
988 /* Parsed "KEYWORD:ARGNUM1,ARGNUM2". */
989 char *dummy;
991 *endp = p - 1;
992 *argnum1p = strtol (first_arg, &dummy, 10);
993 *argnum2p = strtol (last_arg, &dummy, 10);
994 return;
998 else if (p > spec && p[-1] == ':')
1000 /* Parsed "KEYWORD:ARGNUM1. */
1001 char *dummy;
1003 *endp = p - 1;
1004 *argnum1p = strtol (last_arg, &dummy, 10);
1005 *argnum2p = 0;
1006 return;
1009 /* Parsed "KEYWORD". */
1010 *endp = p + strlen (p);
1011 *argnum1p = 0;
1012 *argnum2p = 0;
1016 /* Null context. */
1017 flag_context_ty null_context = { undecided, false, undecided, false };
1019 /* Transparent context. */
1020 flag_context_ty passthrough_context = { undecided, true, undecided, true };
1023 flag_context_ty
1024 inherited_context (flag_context_ty outer_context,
1025 flag_context_ty modifier_context)
1027 flag_context_ty result = modifier_context;
1029 if (result.pass_format1)
1031 result.is_format1 = outer_context.is_format1;
1032 result.pass_format1 = false;
1034 if (result.pass_format2)
1036 result.is_format2 = outer_context.is_format2;
1037 result.pass_format2 = false;
1039 return result;
1043 /* Null context list iterator. */
1044 flag_context_list_iterator_ty null_context_list_iterator = { 1, NULL };
1046 /* Transparent context list iterator. */
1047 static flag_context_list_ty passthrough_context_circular_list =
1050 { undecided, true, undecided, true },
1051 &passthrough_context_circular_list
1053 flag_context_list_iterator_ty passthrough_context_list_iterator =
1056 &passthrough_context_circular_list
1060 flag_context_list_iterator_ty
1061 flag_context_list_iterator (flag_context_list_ty *list)
1063 flag_context_list_iterator_ty result;
1065 result.argnum = 1;
1066 result.head = list;
1067 return result;
1071 flag_context_ty
1072 flag_context_list_iterator_advance (flag_context_list_iterator_ty *iter)
1074 if (iter->head == NULL)
1075 return null_context;
1076 if (iter->argnum == iter->head->argnum)
1078 flag_context_ty result = iter->head->flags;
1080 /* Special casing of circular list. */
1081 if (iter->head != iter->head->next)
1083 iter->head = iter->head->next;
1084 iter->argnum++;
1087 return result;
1089 else
1091 iter->argnum++;
1092 return null_context;
1097 flag_context_list_ty *
1098 flag_context_list_table_lookup (flag_context_list_table_ty *flag_table,
1099 const void *key, size_t keylen)
1101 void *entry;
1103 if (flag_table->table != NULL
1104 && find_entry (flag_table, key, keylen, &entry) == 0)
1105 return (flag_context_list_ty *) entry;
1106 else
1107 return NULL;
1111 static void
1112 flag_context_list_table_insert (flag_context_list_table_ty *table,
1113 unsigned int index,
1114 const char *name_start, const char *name_end,
1115 int argnum, enum is_format value, bool pass)
1117 char *allocated_name = NULL;
1119 if (table == &flag_table_lisp)
1121 /* Convert NAME to upper case. */
1122 size_t name_len = name_end - name_start;
1123 char *name = allocated_name = (char *) xallocsa (name_len);
1124 size_t i;
1126 for (i = 0; i < name_len; i++)
1127 name[i] = (name_start[i] >= 'a' && name_start[i] <= 'z'
1128 ? name_start[i] - 'a' + 'A'
1129 : name_start[i]);
1130 name_start = name;
1131 name_end = name + name_len;
1133 else if (table == &flag_table_tcl)
1135 /* Remove redundant "::" prefix. */
1136 if (name_end - name_start > 2
1137 && name_start[0] == ':' && name_start[1] == ':')
1138 name_start += 2;
1141 /* Insert the pair (VALUE, PASS) at INDEX in the element numbered ARGNUM
1142 of the list corresponding to NAME in the TABLE. */
1143 if (table->table == NULL)
1144 init_hash (table, 100);
1146 void *entry;
1148 if (find_entry (table, name_start, name_end - name_start, &entry) != 0)
1150 /* Create new hash table entry. */
1151 flag_context_list_ty *list =
1152 (flag_context_list_ty *) xmalloc (sizeof (flag_context_list_ty));
1153 list->argnum = argnum;
1154 memset (&list->flags, '\0', sizeof (list->flags));
1155 switch (index)
1157 case 0:
1158 list->flags.is_format1 = value;
1159 list->flags.pass_format1 = pass;
1160 break;
1161 case 1:
1162 list->flags.is_format2 = value;
1163 list->flags.pass_format2 = pass;
1164 break;
1165 default:
1166 abort ();
1168 list->next = NULL;
1169 insert_entry (table, name_start, name_end - name_start, list);
1171 else
1173 flag_context_list_ty *list = (flag_context_list_ty *)entry;
1174 flag_context_list_ty **lastp = NULL;
1176 while (list != NULL && list->argnum < argnum)
1178 lastp = &list->next;
1179 list = *lastp;
1181 if (list != NULL && list->argnum == argnum)
1183 /* Add this flag to the current argument number. */
1184 switch (index)
1186 case 0:
1187 list->flags.is_format1 = value;
1188 list->flags.pass_format1 = pass;
1189 break;
1190 case 1:
1191 list->flags.is_format2 = value;
1192 list->flags.pass_format2 = pass;
1193 break;
1194 default:
1195 abort ();
1198 else if (lastp != NULL)
1200 /* Add a new list entry for this argument number. */
1201 list =
1202 (flag_context_list_ty *) xmalloc (sizeof (flag_context_list_ty));
1203 list->argnum = argnum;
1204 memset (&list->flags, '\0', sizeof (list->flags));
1205 switch (index)
1207 case 0:
1208 list->flags.is_format1 = value;
1209 list->flags.pass_format1 = pass;
1210 break;
1211 case 1:
1212 list->flags.is_format2 = value;
1213 list->flags.pass_format2 = pass;
1214 break;
1215 default:
1216 abort ();
1218 list->next = *lastp;
1219 *lastp = list;
1221 else
1223 /* Add a new list entry for this argument number, at the beginning
1224 of the list. Since we don't have an API for replacing the
1225 value of a key in the hash table, we have to copy the first
1226 list element. */
1227 flag_context_list_ty *copy =
1228 (flag_context_list_ty *) xmalloc (sizeof (flag_context_list_ty));
1229 *copy = *list;
1231 list->argnum = argnum;
1232 memset (&list->flags, '\0', sizeof (list->flags));
1233 switch (index)
1235 case 0:
1236 list->flags.is_format1 = value;
1237 list->flags.pass_format1 = pass;
1238 break;
1239 case 1:
1240 list->flags.is_format2 = value;
1241 list->flags.pass_format2 = pass;
1242 break;
1243 default:
1244 abort ();
1246 list->next = copy;
1251 if (allocated_name != NULL)
1252 freesa (allocated_name);
1256 void
1257 xgettext_record_flag (const char *optionstring)
1259 /* Check the string has at least two colons. (Colons in the name are
1260 allowed, needed for the Lisp and the Tcl backends.) */
1261 const char *colon1;
1262 const char *colon2;
1264 for (colon2 = optionstring + strlen (optionstring); ; )
1266 if (colon2 == optionstring)
1267 goto err;
1268 colon2--;
1269 if (*colon2 == ':')
1270 break;
1272 for (colon1 = colon2; ; )
1274 if (colon1 == optionstring)
1275 goto err;
1276 colon1--;
1277 if (*colon1 == ':')
1278 break;
1281 const char *name_start = optionstring;
1282 const char *name_end = colon1;
1283 const char *argnum_start = colon1 + 1;
1284 const char *argnum_end = colon2;
1285 const char *flag = colon2 + 1;
1286 int argnum;
1288 /* Check the parts' syntax. */
1289 if (name_end == name_start)
1290 goto err;
1291 if (argnum_end == argnum_start)
1292 goto err;
1294 char *endp;
1295 argnum = strtol (argnum_start, &endp, 10);
1296 if (endp != argnum_end)
1297 goto err;
1299 if (argnum <= 0)
1300 goto err;
1302 /* Analyze the flag part. */
1304 bool pass;
1306 pass = false;
1307 if (strlen (flag) >= 5 && memcmp (flag, "pass-", 5) == 0)
1309 pass = true;
1310 flag += 5;
1313 /* Unlike po_parse_comment_special(), we don't accept "fuzzy" or "wrap"
1314 here - it has no sense. */
1315 if (strlen (flag) >= 7
1316 && memcmp (flag + strlen (flag) - 7, "-format", 7) == 0)
1318 const char *p;
1319 size_t n;
1320 enum is_format value;
1321 size_t type;
1323 p = flag;
1324 n = strlen (flag) - 7;
1326 if (n >= 3 && memcmp (p, "no-", 3) == 0)
1328 p += 3;
1329 n -= 3;
1330 value = no;
1332 else if (n >= 9 && memcmp (p, "possible-", 9) == 0)
1334 p += 9;
1335 n -= 9;
1336 value = possible;
1338 else if (n >= 11 && memcmp (p, "impossible-", 11) == 0)
1340 p += 11;
1341 n -= 11;
1342 value = impossible;
1344 else
1345 value = yes_according_to_context;
1347 for (type = 0; type < NFORMATS; type++)
1348 if (strlen (format_language[type]) == n
1349 && memcmp (format_language[type], p, n) == 0)
1351 switch (type)
1353 case format_c:
1354 flag_context_list_table_insert (&flag_table_c, 0,
1355 name_start, name_end,
1356 argnum, value, pass);
1357 flag_context_list_table_insert (&flag_table_objc, 0,
1358 name_start, name_end,
1359 argnum, value, pass);
1360 break;
1361 case format_objc:
1362 flag_context_list_table_insert (&flag_table_objc, 1,
1363 name_start, name_end,
1364 argnum, value, pass);
1365 break;
1366 case format_sh:
1367 flag_context_list_table_insert (&flag_table_sh, 0,
1368 name_start, name_end,
1369 argnum, value, pass);
1370 break;
1371 case format_python:
1372 flag_context_list_table_insert (&flag_table_python, 0,
1373 name_start, name_end,
1374 argnum, value, pass);
1375 break;
1376 case format_lisp:
1377 flag_context_list_table_insert (&flag_table_lisp, 0,
1378 name_start, name_end,
1379 argnum, value, pass);
1380 break;
1381 case format_elisp:
1382 flag_context_list_table_insert (&flag_table_elisp, 0,
1383 name_start, name_end,
1384 argnum, value, pass);
1385 break;
1386 case format_librep:
1387 flag_context_list_table_insert (&flag_table_librep, 0,
1388 name_start, name_end,
1389 argnum, value, pass);
1390 break;
1391 case format_scheme:
1392 flag_context_list_table_insert (&flag_table_scheme, 0,
1393 name_start, name_end,
1394 argnum, value, pass);
1395 break;
1396 case format_smalltalk:
1397 break;
1398 case format_java:
1399 flag_context_list_table_insert (&flag_table_java, 0,
1400 name_start, name_end,
1401 argnum, value, pass);
1402 break;
1403 case format_csharp:
1404 flag_context_list_table_insert (&flag_table_csharp, 0,
1405 name_start, name_end,
1406 argnum, value, pass);
1407 break;
1408 case format_awk:
1409 flag_context_list_table_insert (&flag_table_awk, 0,
1410 name_start, name_end,
1411 argnum, value, pass);
1412 break;
1413 case format_pascal:
1414 break;
1415 case format_ycp:
1416 flag_context_list_table_insert (&flag_table_ycp, 0,
1417 name_start, name_end,
1418 argnum, value, pass);
1419 break;
1420 case format_tcl:
1421 flag_context_list_table_insert (&flag_table_tcl, 0,
1422 name_start, name_end,
1423 argnum, value, pass);
1424 break;
1425 case format_perl:
1426 flag_context_list_table_insert (&flag_table_perl, 0,
1427 name_start, name_end,
1428 argnum, value, pass);
1429 break;
1430 case format_perl_brace:
1431 flag_context_list_table_insert (&flag_table_perl, 1,
1432 name_start, name_end,
1433 argnum, value, pass);
1434 break;
1435 case format_php:
1436 flag_context_list_table_insert (&flag_table_php, 0,
1437 name_start, name_end,
1438 argnum, value, pass);
1439 break;
1440 case format_gcc_internal:
1441 flag_context_list_table_insert (&flag_table_gcc_internal, 0,
1442 name_start, name_end,
1443 argnum, value, pass);
1444 break;
1445 case format_qt:
1446 flag_context_list_table_insert (&flag_table_c, 0,
1447 name_start, name_end,
1448 argnum, value, pass);
1449 break;
1450 default:
1451 abort ();
1453 return;
1455 /* If the flag is not among the valid values, the optionstring is
1456 invalid. */
1461 err:
1462 error (EXIT_FAILURE, 0, _("\
1463 A --flag argument doesn't have the <keyword>:<argnum>:[pass-]<flag> syntax: %s"),
1464 optionstring);
1468 static string_list_ty *comment;
1470 void
1471 xgettext_comment_add (const char *str)
1473 if (comment == NULL)
1474 comment = string_list_alloc ();
1475 string_list_append (comment, str);
1478 const char *
1479 xgettext_comment (size_t n)
1481 if (comment == NULL || n >= comment->nitems)
1482 return NULL;
1483 return comment->item[n];
1486 void
1487 xgettext_comment_reset ()
1489 if (comment != NULL)
1491 string_list_free (comment);
1492 comment = NULL;
1497 refcounted_string_list_ty *savable_comment;
1499 void
1500 savable_comment_add (const char *str)
1502 if (savable_comment == NULL)
1504 savable_comment =
1505 (refcounted_string_list_ty *) xmalloc (sizeof (*savable_comment));
1506 savable_comment->refcount = 1;
1507 string_list_init (&savable_comment->contents);
1509 else if (savable_comment->refcount > 1)
1511 /* Unshare the list by making copies. */
1512 struct string_list_ty *oldcontents;
1513 size_t i;
1515 savable_comment->refcount--;
1516 oldcontents = &savable_comment->contents;
1518 savable_comment =
1519 (refcounted_string_list_ty *) xmalloc (sizeof (*savable_comment));
1520 savable_comment->refcount = 1;
1521 string_list_init (&savable_comment->contents);
1522 for (i = 0; i < oldcontents->nitems; i++)
1523 string_list_append (&savable_comment->contents, oldcontents->item[i]);
1525 string_list_append (&savable_comment->contents, str);
1528 void
1529 savable_comment_reset ()
1531 drop_reference (savable_comment);
1532 savable_comment = NULL;
1535 void
1536 savable_comment_to_xgettext_comment (refcounted_string_list_ty *rslp)
1538 xgettext_comment_reset ();
1539 if (rslp != NULL)
1541 size_t i;
1543 for (i = 0; i < rslp->contents.nitems; i++)
1544 xgettext_comment_add (rslp->contents.item[i]);
1550 static FILE *
1551 xgettext_open (const char *fn,
1552 char **logical_file_name_p, char **real_file_name_p)
1554 FILE *fp;
1555 char *new_name;
1556 char *logical_file_name;
1558 if (strcmp (fn, "-") == 0)
1560 new_name = xstrdup (_("standard input"));
1561 logical_file_name = xstrdup (new_name);
1562 fp = stdin;
1564 else if (IS_ABSOLUTE_PATH (fn))
1566 new_name = xstrdup (fn);
1567 fp = fopen (fn, "r");
1568 if (fp == NULL)
1569 error (EXIT_FAILURE, errno, _("\
1570 error while opening \"%s\" for reading"), fn);
1571 logical_file_name = xstrdup (new_name);
1573 else
1575 int j;
1577 for (j = 0; ; ++j)
1579 const char *dir = dir_list_nth (j);
1581 if (dir == NULL)
1582 error (EXIT_FAILURE, ENOENT, _("\
1583 error while opening \"%s\" for reading"), fn);
1585 new_name = concatenated_pathname (dir, fn, NULL);
1587 fp = fopen (new_name, "r");
1588 if (fp != NULL)
1589 break;
1591 if (errno != ENOENT)
1592 error (EXIT_FAILURE, errno, _("\
1593 error while opening \"%s\" for reading"), new_name);
1594 free (new_name);
1597 /* Note that the NEW_NAME variable contains the actual file name
1598 and the logical file name is what is reported by xgettext. In
1599 this case NEW_NAME is set to the file which was found along the
1600 directory search path, and LOGICAL_FILE_NAME is is set to the
1601 file name which was searched for. */
1602 logical_file_name = xstrdup (fn);
1605 *logical_file_name_p = logical_file_name;
1606 *real_file_name_p = new_name;
1607 return fp;
1611 /* Language dependent format string parser.
1612 NULL if the language has no notion of format strings. */
1613 static struct formatstring_parser *current_formatstring_parser1;
1614 static struct formatstring_parser *current_formatstring_parser2;
1617 static void
1618 extract_from_file (const char *file_name, extractor_ty extractor,
1619 msgdomain_list_ty *mdlp)
1621 char *logical_file_name;
1622 char *real_file_name;
1623 FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name);
1625 /* Set the default for the source file encoding. May be overridden by
1626 the extractor function. */
1627 xgettext_current_source_encoding = xgettext_global_source_encoding;
1628 #if HAVE_ICONV
1629 xgettext_current_source_iconv = xgettext_global_source_iconv;
1630 #endif
1632 current_formatstring_parser1 = extractor.formatstring_parser1;
1633 current_formatstring_parser2 = extractor.formatstring_parser2;
1634 extractor.func (fp, real_file_name, logical_file_name, extractor.flag_table,
1635 mdlp);
1637 if (fp != stdin)
1638 fclose (fp);
1639 free (logical_file_name);
1640 free (real_file_name);
1645 #if !HAVE_ICONV
1646 /* If we don't have iconv(), the only supported values for
1647 xgettext_global_source_encoding and thus also for
1648 xgettext_current_source_encoding are ASCII and UTF-8.
1649 convert_string() should not be called in this case. */
1650 #define convert_string(cd,string) (abort (), (string))
1651 #endif
1653 /* Convert the given string from xgettext_current_source_encoding to
1654 the output file encoding (i.e. ASCII or UTF-8).
1655 The resulting string is either the argument string, or freshly allocated.
1656 The file_name and line_number are only used for error message purposes. */
1657 char *
1658 from_current_source_encoding (const char *string,
1659 const char *file_name, size_t line_number)
1661 if (xgettext_current_source_encoding == po_charset_ascii)
1663 if (!is_ascii_string (string))
1665 char buffer[21];
1667 if (line_number == (size_t)(-1))
1668 buffer[0] = '\0';
1669 else
1670 sprintf (buffer, ":%ld", (long) line_number);
1671 multiline_error (xstrdup (""),
1672 xasprintf (_("\
1673 Non-ASCII string at %s%s.\n\
1674 Please specify the source encoding through --from-code.\n"),
1675 file_name, buffer));
1676 exit (EXIT_FAILURE);
1679 else if (xgettext_current_source_encoding != po_charset_utf8)
1680 string = convert_string (xgettext_current_source_iconv, string);
1682 return (char *) string;
1685 #define CONVERT_STRING(string) \
1686 string = from_current_source_encoding (string, pos->file_name, \
1687 pos->line_number);
1690 /* Update the is_format[] flags depending on the information given in the
1691 context. */
1692 static void
1693 set_format_flags_from_context (enum is_format is_format[NFORMATS],
1694 flag_context_ty context, const char *string,
1695 lex_pos_ty *pos, const char *pretty_msgstr)
1697 size_t i;
1699 if (context.is_format1 != undecided || context.is_format2 != undecided)
1700 for (i = 0; i < NFORMATS; i++)
1702 if (is_format[i] == undecided)
1704 if (formatstring_parsers[i] == current_formatstring_parser1
1705 && context.is_format1 != undecided)
1706 is_format[i] = (enum is_format) context.is_format1;
1707 if (formatstring_parsers[i] == current_formatstring_parser2
1708 && context.is_format2 != undecided)
1709 is_format[i] = (enum is_format) context.is_format2;
1711 if (possible_format_p (is_format[i]))
1713 struct formatstring_parser *parser = formatstring_parsers[i];
1714 char *invalid_reason = NULL;
1715 void *descr = parser->parse (string, false, &invalid_reason);
1717 if (descr != NULL)
1718 parser->free (descr);
1719 else
1721 /* The string is not a valid format string. */
1722 if (is_format[i] != possible)
1724 char buffer[21];
1726 error_with_progname = false;
1727 if (pos->line_number == (size_t)(-1))
1728 buffer[0] = '\0';
1729 else
1730 sprintf (buffer, ":%ld", (long) pos->line_number);
1731 multiline_warning (xasprintf (_("%s%s: warning: "),
1732 pos->file_name, buffer),
1733 xasprintf (is_format[i] == yes_according_to_context ? _("Although being used in a format string position, the %s is not a valid %s format string. Reason: %s\n") : _("Although declared as such, the %s is not a valid %s format string. Reason: %s\n"),
1734 pretty_msgstr,
1735 format_language_pretty[i],
1736 invalid_reason));
1737 error_with_progname = true;
1740 is_format[i] = impossible;
1741 free (invalid_reason);
1748 message_ty *
1749 remember_a_message (message_list_ty *mlp, char *string,
1750 flag_context_ty context, lex_pos_ty *pos)
1752 enum is_format is_format[NFORMATS];
1753 enum is_wrap do_wrap;
1754 char *msgid;
1755 message_ty *mp;
1756 char *msgstr;
1757 size_t i;
1759 msgid = string;
1761 /* See whether we shall exclude this message. */
1762 if (exclude != NULL && message_list_search (exclude, msgid) != NULL)
1764 /* Tell the lexer to reset its comment buffer, so that the next
1765 message gets the correct comments. */
1766 xgettext_comment_reset ();
1768 return NULL;
1771 for (i = 0; i < NFORMATS; i++)
1772 is_format[i] = undecided;
1773 do_wrap = undecided;
1775 CONVERT_STRING (msgid);
1777 if (msgid[0] == '\0' && !xgettext_omit_header)
1779 char buffer[21];
1781 error_with_progname = false;
1782 if (pos->line_number == (size_t)(-1))
1783 buffer[0] = '\0';
1784 else
1785 sprintf (buffer, ":%ld", (long) pos->line_number);
1786 multiline_warning (xasprintf (_("%s%s: warning: "), pos->file_name,
1787 buffer),
1788 xstrdup (_("\
1789 Empty msgid. It is reserved by GNU gettext:\n\
1790 gettext(\"\") returns the header entry with\n\
1791 meta information, not the empty string.\n")));
1792 error_with_progname = true;
1795 /* See if we have seen this message before. */
1796 mp = message_list_search (mlp, msgid);
1797 if (mp != NULL)
1799 free (msgid);
1800 for (i = 0; i < NFORMATS; i++)
1801 is_format[i] = mp->is_format[i];
1802 do_wrap = mp->do_wrap;
1804 else
1806 static lex_pos_ty dummypos = { __FILE__, __LINE__ };
1808 /* Construct the msgstr from the prefix and suffix, otherwise use the
1809 empty string. */
1810 if (msgstr_prefix)
1812 msgstr = (char *) xmalloc (strlen (msgstr_prefix)
1813 + strlen (msgid)
1814 + strlen (msgstr_suffix) + 1);
1815 stpcpy (stpcpy (stpcpy (msgstr, msgstr_prefix), msgid),
1816 msgstr_suffix);
1818 else
1819 msgstr = "";
1821 /* Allocate a new message and append the message to the list. */
1822 mp = message_alloc (msgid, NULL, msgstr, strlen (msgstr) + 1, &dummypos);
1823 /* Do not free msgid. */
1824 message_list_append (mlp, mp);
1827 /* Determine whether the context specifies that the msgid is a format
1828 string. */
1829 set_format_flags_from_context (is_format, context, mp->msgid, pos, "msgid");
1831 /* Ask the lexer for the comments it has seen. */
1833 size_t nitems_before;
1834 size_t nitems_after;
1835 int j;
1836 bool add_all_remaining_comments;
1838 nitems_before = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
1840 add_all_remaining_comments = add_all_comments;
1841 for (j = 0; ; ++j)
1843 const char *s = xgettext_comment (j);
1844 const char *t;
1845 if (s == NULL)
1846 break;
1848 CONVERT_STRING (s);
1850 /* To reduce the possibility of unwanted matches we do a two
1851 step match: the line must contain `xgettext:' and one of
1852 the possible format description strings. */
1853 if ((t = strstr (s, "xgettext:")) != NULL)
1855 bool tmp_fuzzy;
1856 enum is_format tmp_format[NFORMATS];
1857 enum is_wrap tmp_wrap;
1858 bool interesting;
1860 t += strlen ("xgettext:");
1862 po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_wrap);
1864 interesting = false;
1865 for (i = 0; i < NFORMATS; i++)
1866 if (tmp_format[i] != undecided)
1868 is_format[i] = tmp_format[i];
1869 interesting = true;
1871 if (tmp_wrap != undecided)
1873 do_wrap = tmp_wrap;
1874 interesting = true;
1877 /* If the "xgettext:" marker was followed by an interesting
1878 keyword, and we updated our is_format/do_wrap variables,
1879 we don't print the comment as a #. comment. */
1880 if (interesting)
1881 continue;
1883 /* When the comment tag is seen, it drags in not only the line
1884 which it starts, but all remaining comment lines. */
1885 if (add_all_remaining_comments
1886 || (add_all_remaining_comments =
1887 (comment_tag != NULL
1888 && strncmp (s, comment_tag, strlen (comment_tag)) == 0)))
1889 message_comment_dot_append (mp, s);
1892 nitems_after = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
1894 /* Don't add the comments if they are a repetition of the tail of the
1895 already present comments. This avoids unneeded duplication if the
1896 same message appears several times, each time with the same comment. */
1897 if (nitems_before < nitems_after)
1899 size_t added = nitems_after - nitems_before;
1901 if (added <= nitems_before)
1903 bool repeated = true;
1905 for (i = 0; i < added; i++)
1906 if (strcmp (mp->comment_dot->item[nitems_before - added + i],
1907 mp->comment_dot->item[nitems_before + i]) != 0)
1909 repeated = false;
1910 break;
1913 if (repeated)
1915 for (i = 0; i < added; i++)
1916 free ((char *) mp->comment_dot->item[nitems_before + i]);
1917 mp->comment_dot->nitems = nitems_before;
1923 /* If it is not already decided, through programmer comments, whether the
1924 msgid is a format string, examine the msgid. This is a heuristic. */
1925 for (i = 0; i < NFORMATS; i++)
1927 if (is_format[i] == undecided
1928 && (formatstring_parsers[i] == current_formatstring_parser1
1929 || formatstring_parsers[i] == current_formatstring_parser2)
1930 /* But avoid redundancy: objc-format is stronger than c-format. */
1931 && !(i == format_c && possible_format_p (is_format[format_objc]))
1932 && !(i == format_objc && possible_format_p (is_format[format_c])))
1934 struct formatstring_parser *parser = formatstring_parsers[i];
1935 char *invalid_reason = NULL;
1936 void *descr = parser->parse (mp->msgid, false, &invalid_reason);
1938 if (descr != NULL)
1940 /* msgid is a valid format string. We mark only those msgids
1941 as format strings which contain at least one format directive
1942 and thus are format strings with a high probability. We
1943 don't mark strings without directives as format strings,
1944 because that would force the programmer to add
1945 "xgettext: no-c-format" anywhere where a translator wishes
1946 to use a percent sign. So, the msgfmt checking will not be
1947 perfect. Oh well. */
1948 if (parser->get_number_of_directives (descr) > 0)
1949 is_format[i] = possible;
1951 parser->free (descr);
1953 else
1955 /* msgid is not a valid format string. */
1956 is_format[i] = impossible;
1957 free (invalid_reason);
1960 mp->is_format[i] = is_format[i];
1963 mp->do_wrap = do_wrap == no ? no : yes; /* By default we wrap. */
1965 /* Remember where we saw this msgid. */
1966 if (line_comment)
1967 message_comment_filepos (mp, pos->file_name, pos->line_number);
1969 /* Tell the lexer to reset its comment buffer, so that the next
1970 message gets the correct comments. */
1971 xgettext_comment_reset ();
1973 return mp;
1977 void
1978 remember_a_message_plural (message_ty *mp, char *string,
1979 flag_context_ty context, lex_pos_ty *pos)
1981 char *msgid_plural;
1982 char *msgstr1;
1983 size_t msgstr1_len;
1984 char *msgstr;
1985 size_t i;
1987 msgid_plural = string;
1989 CONVERT_STRING (msgid_plural);
1991 /* See if the message is already a plural message. */
1992 if (mp->msgid_plural == NULL)
1994 mp->msgid_plural = msgid_plural;
1996 /* Construct the first plural form from the prefix and suffix,
1997 otherwise use the empty string. The translator will have to
1998 provide additional plural forms. */
1999 if (msgstr_prefix)
2001 msgstr1 = (char *) xmalloc (strlen (msgstr_prefix)
2002 + strlen (msgid_plural)
2003 + strlen (msgstr_suffix) + 1);
2004 stpcpy (stpcpy (stpcpy (msgstr1, msgstr_prefix), msgid_plural),
2005 msgstr_suffix);
2007 else
2008 msgstr1 = "";
2009 msgstr1_len = strlen (msgstr1) + 1;
2010 msgstr = (char *) xmalloc (mp->msgstr_len + msgstr1_len);
2011 memcpy (msgstr, mp->msgstr, mp->msgstr_len);
2012 memcpy (msgstr + mp->msgstr_len, msgstr1, msgstr1_len);
2013 mp->msgstr = msgstr;
2014 mp->msgstr_len = mp->msgstr_len + msgstr1_len;
2016 /* Determine whether the context specifies that the msgid_plural is a
2017 format string. */
2018 set_format_flags_from_context (mp->is_format, context, mp->msgid_plural,
2019 pos, "msgid_plural");
2021 /* If it is not already decided, through programmer comments or
2022 the msgid, whether the msgid is a format string, examine the
2023 msgid_plural. This is a heuristic. */
2024 for (i = 0; i < NFORMATS; i++)
2025 if ((formatstring_parsers[i] == current_formatstring_parser1
2026 || formatstring_parsers[i] == current_formatstring_parser2)
2027 && (mp->is_format[i] == undecided || mp->is_format[i] == possible)
2028 /* But avoid redundancy: objc-format is stronger than c-format. */
2029 && !(i == format_c
2030 && possible_format_p (mp->is_format[format_objc]))
2031 && !(i == format_objc
2032 && possible_format_p (mp->is_format[format_c])))
2034 struct formatstring_parser *parser = formatstring_parsers[i];
2035 char *invalid_reason = NULL;
2036 void *descr =
2037 parser->parse (mp->msgid_plural, false, &invalid_reason);
2039 if (descr != NULL)
2041 /* Same heuristic as in remember_a_message. */
2042 if (parser->get_number_of_directives (descr) > 0)
2043 mp->is_format[i] = possible;
2045 parser->free (descr);
2047 else
2049 /* msgid_plural is not a valid format string. */
2050 mp->is_format[i] = impossible;
2051 free (invalid_reason);
2055 else
2056 free (msgid_plural);
2060 static message_ty *
2061 construct_header ()
2063 time_t now;
2064 char *timestring;
2065 message_ty *mp;
2066 char *msgstr;
2067 static lex_pos_ty pos = { __FILE__, __LINE__ };
2069 if (msgid_bugs_address != NULL && msgid_bugs_address[0] == '\0')
2070 multiline_warning (xasprintf (_("warning: ")),
2071 xstrdup (_("\
2072 The option --msgid-bugs-address was not specified.\n\
2073 If you are using a `Makevars' file, please specify\n\
2074 the MSGID_BUGS_ADDRESS variable there; otherwise please\n\
2075 specify an --msgid-bugs-address command line option.\n\
2076 ")));
2078 time (&now);
2079 timestring = po_strftime (&now);
2081 msgstr = xasprintf ("\
2082 Project-Id-Version: PACKAGE VERSION\n\
2083 Report-Msgid-Bugs-To: %s\n\
2084 POT-Creation-Date: %s\n\
2085 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n\
2086 Last-Translator: FULL NAME <EMAIL@ADDRESS>\n\
2087 Language-Team: LANGUAGE <LL@li.org>\n\
2088 MIME-Version: 1.0\n\
2089 Content-Type: text/plain; charset=CHARSET\n\
2090 Content-Transfer-Encoding: 8bit\n",
2091 msgid_bugs_address != NULL ? msgid_bugs_address : "",
2092 timestring);
2093 free (timestring);
2095 mp = message_alloc ("", NULL, msgstr, strlen (msgstr) + 1, &pos);
2097 message_comment_append (mp,
2098 copyright_holder[0] != '\0'
2099 ? xasprintf ("\
2100 SOME DESCRIPTIVE TITLE.\n\
2101 Copyright (C) YEAR %s\n\
2102 This file is distributed under the same license as the PACKAGE package.\n\
2103 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n",
2104 copyright_holder)
2105 : "\
2106 SOME DESCRIPTIVE TITLE.\n\
2107 This file is put in the public domain.\n\
2108 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n");
2110 mp->is_fuzzy = true;
2112 return mp;
2115 static void
2116 finalize_header (msgdomain_list_ty *mdlp)
2118 /* If the generated PO file has plural forms, add a Plural-Forms template
2119 to the constructed header. */
2121 bool has_plural;
2122 size_t i, j;
2124 has_plural = false;
2125 for (i = 0; i < mdlp->nitems; i++)
2127 message_list_ty *mlp = mdlp->item[i]->messages;
2129 for (j = 0; j < mlp->nitems; j++)
2131 message_ty *mp = mlp->item[j];
2133 if (mp->msgid_plural != NULL)
2135 has_plural = true;
2136 break;
2139 if (has_plural)
2140 break;
2143 if (has_plural)
2145 message_ty *header = message_list_search (mdlp->item[0]->messages, "");
2146 if (header != NULL
2147 && strstr (header->msgstr, "Plural-Forms:") == NULL)
2149 size_t insertpos = strlen (header->msgstr);
2150 const char *suffix;
2151 size_t suffix_len;
2152 char *new_msgstr;
2154 suffix = "\nPlural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n";
2155 if (insertpos == 0 || header->msgstr[insertpos-1] == '\n')
2156 suffix++;
2157 suffix_len = strlen (suffix);
2158 new_msgstr = (char *) xmalloc (header->msgstr_len + suffix_len);
2159 memcpy (new_msgstr, header->msgstr, insertpos);
2160 memcpy (new_msgstr + insertpos, suffix, suffix_len);
2161 memcpy (new_msgstr + insertpos + suffix_len,
2162 header->msgstr + insertpos,
2163 header->msgstr_len - insertpos);
2164 header->msgstr = new_msgstr;
2165 header->msgstr_len = header->msgstr_len + suffix_len;
2170 /* If not all the strings were plain ASCII, or if the output syntax
2171 requires a charset conversion, set the charset in the header to UTF-8.
2172 All messages have already been converted to UTF-8 in remember_a_message
2173 and remember_a_message_plural. */
2175 bool has_nonascii = false;
2176 size_t i;
2178 for (i = 0; i < mdlp->nitems; i++)
2180 message_list_ty *mlp = mdlp->item[i]->messages;
2182 if (!is_ascii_message_list (mlp))
2183 has_nonascii = true;
2186 if (has_nonascii
2187 || output_syntax == syntax_properties
2188 || output_syntax == syntax_stringtable)
2190 message_list_ty *mlp = mdlp->item[0]->messages;
2192 iconv_message_list (mlp, po_charset_utf8, po_charset_utf8, NULL);
2198 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
2199 #define ENDOF(a) ((a) + SIZEOF(a))
2202 static extractor_ty
2203 language_to_extractor (const char *name)
2205 struct table_ty
2207 const char *name;
2208 extractor_func func;
2209 flag_context_list_table_ty *flag_table;
2210 struct formatstring_parser *formatstring_parser1;
2211 struct formatstring_parser *formatstring_parser2;
2213 typedef struct table_ty table_ty;
2215 static table_ty table[] =
2217 SCANNERS_C
2218 SCANNERS_PO
2219 SCANNERS_SH
2220 SCANNERS_PYTHON
2221 SCANNERS_LISP
2222 SCANNERS_ELISP
2223 SCANNERS_LIBREP
2224 SCANNERS_SCHEME
2225 SCANNERS_SMALLTALK
2226 SCANNERS_JAVA
2227 SCANNERS_PROPERTIES
2228 SCANNERS_CSHARP
2229 SCANNERS_AWK
2230 SCANNERS_YCP
2231 SCANNERS_TCL
2232 SCANNERS_PERL
2233 SCANNERS_PHP
2234 SCANNERS_STRINGTABLE
2235 SCANNERS_RST
2236 SCANNERS_GLADE
2237 /* Here may follow more languages and their scanners: pike, etc...
2238 Make sure new scanners honor the --exclude-file option. */
2241 table_ty *tp;
2243 for (tp = table; tp < ENDOF(table); ++tp)
2244 if (c_strcasecmp (name, tp->name) == 0)
2246 extractor_ty result;
2248 result.func = tp->func;
2249 result.flag_table = tp->flag_table;
2250 result.formatstring_parser1 = tp->formatstring_parser1;
2251 result.formatstring_parser2 = tp->formatstring_parser2;
2253 /* Handle --qt. It's preferrable to handle this facility here rather
2254 than through an option --language=C++/Qt because the latter would
2255 conflict with the language "C++" regarding the file extensions. */
2256 if (recognize_format_qt && strcmp (tp->name, "C++") == 0)
2257 result.formatstring_parser2 = &formatstring_qt;
2259 return result;
2262 error (EXIT_FAILURE, 0, _("language `%s' unknown"), name);
2263 /* NOTREACHED */
2265 extractor_ty result = { NULL, NULL, NULL, NULL };
2266 return result;
2271 static const char *
2272 extension_to_language (const char *extension)
2274 struct table_ty
2276 const char *extension;
2277 const char *language;
2279 typedef struct table_ty table_ty;
2281 static table_ty table[] =
2283 EXTENSIONS_C
2284 EXTENSIONS_PO
2285 EXTENSIONS_SH
2286 EXTENSIONS_PYTHON
2287 EXTENSIONS_LISP
2288 EXTENSIONS_ELISP
2289 EXTENSIONS_LIBREP
2290 EXTENSIONS_SCHEME
2291 EXTENSIONS_SMALLTALK
2292 EXTENSIONS_JAVA
2293 EXTENSIONS_PROPERTIES
2294 EXTENSIONS_CSHARP
2295 EXTENSIONS_AWK
2296 EXTENSIONS_YCP
2297 EXTENSIONS_TCL
2298 EXTENSIONS_PERL
2299 EXTENSIONS_PHP
2300 EXTENSIONS_STRINGTABLE
2301 EXTENSIONS_RST
2302 EXTENSIONS_GLADE
2303 /* Here may follow more file extensions... */
2306 table_ty *tp;
2308 for (tp = table; tp < ENDOF(table); ++tp)
2309 if (strcmp (extension, tp->extension) == 0)
2310 return tp->language;
2311 return NULL;