1 /* Extract some translations of a translation catalog.
2 Copyright (C) 2001-2005 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
36 #elif defined _MSC_VER || defined __MINGW32__
45 #include "error-progname.h"
47 #include "relocatable.h"
53 #include "msgl-charset.h"
60 #define _(str) gettext (str)
63 /* Force output of PO file even if empty. */
66 /* Selected source files. */
67 static string_list_ty
*location_files
;
69 /* Selected domain names. */
70 static string_list_ty
*domain_names
;
72 /* Task for each grep pass. */
78 bool case_insensitive
;
79 void *compiled_patterns
;
81 static struct grep_task grep_task
[3];
84 static const struct option long_options
[] =
86 { "add-location", no_argument
, &line_comment
, 1 },
87 { "comment", no_argument
, NULL
, 'C' },
88 { "directory", required_argument
, NULL
, 'D' },
89 { "domain", required_argument
, NULL
, 'M' },
90 { "escape", no_argument
, NULL
, CHAR_MAX
+ 1 },
91 { "extended-regexp", no_argument
, NULL
, 'E' },
92 { "file", required_argument
, NULL
, 'f' },
93 { "fixed-strings", no_argument
, NULL
, 'F' },
94 { "force-po", no_argument
, &force_po
, 1 },
95 { "help", no_argument
, NULL
, 'h' },
96 { "ignore-case", no_argument
, NULL
, 'i' },
97 { "indent", no_argument
, NULL
, CHAR_MAX
+ 2 },
98 { "location", required_argument
, NULL
, 'N' },
99 { "msgid", no_argument
, NULL
, 'K' },
100 { "msgstr", no_argument
, NULL
, 'T' },
101 { "no-escape", no_argument
, NULL
, CHAR_MAX
+ 3 },
102 { "no-location", no_argument
, &line_comment
, 0 },
103 { "no-wrap", no_argument
, NULL
, CHAR_MAX
+ 6 },
104 { "output-file", required_argument
, NULL
, 'o' },
105 { "properties-input", no_argument
, NULL
, 'P' },
106 { "properties-output", no_argument
, NULL
, 'p' },
107 { "regexp", required_argument
, NULL
, 'e' },
108 { "sort-by-file", no_argument
, NULL
, CHAR_MAX
+ 4 },
109 { "sort-output", no_argument
, NULL
, CHAR_MAX
+ 5 },
110 { "strict", no_argument
, NULL
, 'S' },
111 { "stringtable-input", no_argument
, NULL
, CHAR_MAX
+ 7 },
112 { "stringtable-output", no_argument
, NULL
, CHAR_MAX
+ 8 },
113 { "version", no_argument
, NULL
, 'V' },
114 { "width", required_argument
, NULL
, 'w' },
119 /* Forward declaration of local functions. */
120 static void no_pass (int opt
)
121 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
122 __attribute__ ((noreturn
))
125 static void usage (int status
)
126 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
127 __attribute__ ((noreturn
))
130 static msgdomain_list_ty
*process_msgdomain_list (msgdomain_list_ty
*mdlp
);
134 main (int argc
, char **argv
)
140 const char *input_file
;
142 msgdomain_list_ty
*result
;
143 bool sort_by_filepos
= false;
144 bool sort_by_msgid
= false;
147 /* Set program name for messages. */
148 set_program_name (argv
[0]);
149 error_print_progname
= maybe_print_progname
;
151 #ifdef HAVE_SETLOCALE
152 /* Set locale via LC_ALL. */
153 setlocale (LC_ALL
, "");
156 /* Set the text message domain. */
157 bindtextdomain (PACKAGE
, relocate (LOCALEDIR
));
158 textdomain (PACKAGE
);
160 /* Ensure that write errors on stdout are detected. */
161 atexit (close_stdout
);
163 /* Set default values for variables. */
169 location_files
= string_list_alloc ();
170 domain_names
= string_list_alloc ();
172 for (i
= 0; i
< 3; i
++)
174 struct grep_task
*gt
= &grep_task
[i
];
176 gt
->matcher
= &matcher_grep
;
177 gt
->pattern_count
= 0;
179 gt
->patterns_size
= 0;
180 gt
->case_insensitive
= false;
183 while ((opt
= getopt_long (argc
, argv
, "CD:e:Ef:FhiKM:N:o:pPTVw:",
188 case '\0': /* Long option. */
196 dir_list_append (optarg
);
203 struct grep_task
*gt
= &grep_task
[grep_pass
];
204 /* Append optarg and a newline to gt->patterns. */
205 size_t len
= strlen (optarg
);
207 (char *) xrealloc (gt
->patterns
, gt
->patterns_size
+ len
+ 1);
208 memcpy (gt
->patterns
+ gt
->patterns_size
, optarg
, len
);
209 gt
->patterns_size
+= len
;
210 *(gt
->patterns
+ gt
->patterns_size
) = '\n';
211 gt
->patterns_size
+= 1;
219 grep_task
[grep_pass
].matcher
= &matcher_egrep
;
226 struct grep_task
*gt
= &grep_task
[grep_pass
];
227 /* Append the contents of the specified file to gt->patterns. */
228 FILE *fp
= fopen (optarg
, "r");
231 error (EXIT_FAILURE
, errno
, _("\
232 error while opening \"%s\" for reading"), optarg
);
237 size_t count
= fread (buf
, 1, sizeof buf
, fp
);
242 error (EXIT_FAILURE
, errno
, _("\
243 error while reading \"%s\""), optarg
);
249 (char *) xrealloc (gt
->patterns
, gt
->patterns_size
+ count
);
250 memcpy (gt
->patterns
+ gt
->patterns_size
, buf
, count
);
251 gt
->patterns_size
+= count
;
254 /* Append a final newline if file ended in a non-newline. */
255 if (gt
->patterns_size
> 0
256 && *(gt
->patterns
+ gt
->patterns_size
- 1) != '\n')
259 (char *) xrealloc (gt
->patterns
, gt
->patterns_size
+ 1);
260 *(gt
->patterns
+ gt
->patterns_size
) = '\n';
261 gt
->patterns_size
+= 1;
272 grep_task
[grep_pass
].matcher
= &matcher_fgrep
;
282 grep_task
[grep_pass
].case_insensitive
= true;
290 string_list_append (domain_names
, optarg
);
294 string_list_append (location_files
, optarg
);
298 output_file
= optarg
;
302 message_print_syntax_properties ();
306 input_syntax
= syntax_properties
;
310 message_print_style_uniforum ();
325 value
= strtol (optarg
, &endp
, 10);
327 message_page_width_set (value
);
332 message_print_style_escape (true);
336 message_print_style_indent ();
340 message_print_style_escape (false);
344 sort_by_filepos
= true;
348 sort_by_msgid
= true;
351 case CHAR_MAX
+ 6: /* --no-wrap */
352 message_page_width_ignore ();
355 case CHAR_MAX
+ 7: /* --stringtable-input */
356 input_syntax
= syntax_stringtable
;
359 case CHAR_MAX
+ 8: /* --stringtable-output */
360 message_print_syntax_stringtable ();
364 usage (EXIT_FAILURE
);
368 /* Version information is requested. */
371 printf ("%s (GNU %s) %s\n", basename (program_name
), PACKAGE
, VERSION
);
372 /* xgettext: no-wrap */
373 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
374 This is free software; see the source for copying conditions. There is NO\n\
375 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
378 printf (_("Written by %s.\n"), "Bruno Haible");
382 /* Help is requested. */
384 usage (EXIT_SUCCESS
);
386 /* Test whether we have an .po file name as argument. */
389 else if (optind
+ 1 == argc
)
390 input_file
= argv
[optind
];
393 error (EXIT_SUCCESS
, 0, _("at most one input file allowed"));
394 usage (EXIT_FAILURE
);
397 /* Verify selected options. */
398 if (!line_comment
&& sort_by_filepos
)
399 error (EXIT_FAILURE
, 0, _("%s and %s are mutually exclusive"),
400 "--no-location", "--sort-by-file");
402 if (sort_by_msgid
&& sort_by_filepos
)
403 error (EXIT_FAILURE
, 0, _("%s and %s are mutually exclusive"),
404 "--sort-output", "--sort-by-file");
406 /* Compile the patterns. */
407 for (grep_pass
= 0; grep_pass
< 3; grep_pass
++)
409 struct grep_task
*gt
= &grep_task
[grep_pass
];
411 if (gt
->pattern_count
> 0)
413 if (gt
->patterns_size
> 0)
415 /* Strip trailing newline. */
416 assert (gt
->patterns
[gt
->patterns_size
- 1] == '\n');
419 gt
->compiled_patterns
=
420 gt
->matcher
->compile (gt
->patterns
, gt
->patterns_size
,
421 gt
->case_insensitive
, false, false, '\n');
425 /* Read input file. */
426 result
= read_po_file (input_file
);
428 if (grep_task
[0].pattern_count
> 0
429 || grep_task
[1].pattern_count
> 0
430 || grep_task
[2].pattern_count
> 0)
432 /* Warn if the current locale is not suitable for this PO file. */
433 compare_po_locale_charsets (result
);
436 /* Select the messages. */
437 result
= process_msgdomain_list (result
);
439 /* Sort the results. */
441 msgdomain_list_sort_by_filepos (result
);
442 else if (sort_by_msgid
)
443 msgdomain_list_sort_by_msgid (result
);
445 /* Write the merged message list out. */
446 msgdomain_list_print (result
, output_file
, force_po
, false);
455 error (EXIT_SUCCESS
, 0,
456 _("option '%c' cannot be used before 'K' or 'T' or 'C' has been specified"),
458 usage (EXIT_FAILURE
);
462 /* Display usage information and exit. */
466 if (status
!= EXIT_SUCCESS
)
467 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
472 Usage: %s [OPTION] [INPUTFILE]\n\
475 /* xgettext: no-wrap */
477 Extracts all messages of a translation catalog that match a given pattern\n\
478 or belong to some given source files.\n\
482 Mandatory arguments to long options are mandatory for short options too.\n"));
485 Input file location:\n"));
487 INPUTFILE input PO file\n"));
489 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
491 If no input file is given or if it is -, standard input is read.\n"));
494 Output file location:\n"));
496 -o, --output-file=FILE write output to specified file\n"));
498 The results are written to standard output if no output file is specified\n\
501 /* xgettext: no-wrap */
503 Message selection:\n\
504 [-N SOURCEFILE]... [-M DOMAINNAME]...\n\
505 [-K MSGID-PATTERN] [-T MSGSTR-PATTERN] [-C COMMENT-PATTERN]\n\
506 A message is selected if it comes from one of the specified source files,\n\
507 or if it comes from one of the specified domains,\n\
508 or if -K is given and its key (msgid or msgid_plural) matches MSGID-PATTERN,\n\
509 or if -T is given and its translation (msgstr) matches MSGSTR-PATTERN,\n\
510 or if -C is given and the translator's comment matches COMMENT-PATTERN.\n\
512 When more than one selection criterion is specified, the set of selected\n\
513 messages is the union of the selected messages of each criterion.\n\
515 MSGID-PATTERN or MSGSTR-PATTERN or COMMENT-PATTERN syntax:\n\
516 [-E | -F] [-e PATTERN | -f FILE]...\n\
517 PATTERNs are basic regular expressions by default, or extended regular\n\
518 expressions if -E is given, or fixed strings if -F is given.\n\
520 -N, --location=SOURCEFILE select messages extracted from SOURCEFILE\n\
521 -M, --domain=DOMAINNAME select messages belonging to domain DOMAINNAME\n\
522 -K, --msgid start of patterns for the msgid\n\
523 -T, --msgstr start of patterns for the msgstr\n\
524 -C, --comment start of patterns for the translator's comment\n\
525 -E, --extended-regexp PATTERN is an extended regular expression\n\
526 -F, --fixed-strings PATTERN is a set of newline-separated strings\n\
527 -e, --regexp=PATTERN use PATTERN as a regular expression\n\
528 -f, --file=FILE obtain PATTERN from FILE\n\
529 -i, --ignore-case ignore case distinctions\n\
533 Input file syntax:\n"));
535 -P, --properties-input input file is in Java .properties syntax\n"));
537 --stringtable-input input file is in NeXTstep/GNUstep .strings syntax\n"));
540 Output details:\n"));
542 --no-escape do not use C escapes in output (default)\n"));
544 --escape use C escapes in output, no extended chars\n"));
546 --force-po write PO file even if empty\n"));
548 --indent indented output style\n"));
550 --no-location suppress '#: filename:line' lines\n"));
552 --add-location preserve '#: filename:line' lines (default)\n"));
554 --strict strict Uniforum output style\n"));
556 -p, --properties-output write out a Java .properties file\n"));
558 --stringtable-output write out a NeXTstep/GNUstep .strings file\n"));
560 -w, --width=NUMBER set output page width\n"));
562 --no-wrap do not break long message lines, longer than\n\
563 the output page width, into several lines\n"));
565 --sort-output generate sorted output\n"));
567 --sort-by-file sort output by file location\n"));
570 Informative output:\n"));
572 -h, --help display this help and exit\n"));
574 -V, --version output version information and exit\n"));
576 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
584 /* Return 1 if FILENAME is contained in a list of filename patterns,
587 filename_list_match (const string_list_ty
*slp
, const char *filename
)
591 for (j
= 0; j
< slp
->nitems
; ++j
)
592 if (fnmatch (slp
->item
[j
], filename
, FNM_PATHNAME
) == 0)
600 /* EINTR handling for close().
601 These functions can return -1/EINTR even though we don't have any
602 signal handlers set up, namely when we get interrupted via SIGSTOP. */
605 nonintr_close (int fd
)
611 while (retval
< 0 && errno
== EINTR
);
615 #define close nonintr_close
620 /* Process a string STR of size LEN bytes through grep, and return true
623 is_string_selected (int grep_pass
, const char *str
, size_t len
)
625 const struct grep_task
*gt
= &grep_task
[grep_pass
];
627 if (gt
->pattern_count
> 0)
633 gt
->matcher
->execute (gt
->compiled_patterns
, str
, len
,
635 return (match_offset
!= (size_t) -1);
642 /* Return true if a message matches. */
644 is_message_selected (const message_ty
*mp
)
651 /* Always keep the header entry. */
652 if (mp
->msgid
[0] == '\0')
655 /* Test whether one of mp->filepos[] is selected. */
656 for (i
= 0; i
< mp
->filepos_count
; i
++)
657 if (filename_list_match (location_files
, mp
->filepos
[i
].file_name
))
660 /* Test msgid and msgid_plural using the --msgid arguments. */
661 if (is_string_selected (0, mp
->msgid
, strlen (mp
->msgid
)))
663 if (mp
->msgid_plural
!= NULL
664 && is_string_selected (0, mp
->msgid_plural
, strlen (mp
->msgid_plural
)))
667 /* Test msgstr using the --msgstr arguments. */
669 msgstr_len
= mp
->msgstr_len
;
670 /* Process each NUL delimited substring separately. */
671 for (p
= msgstr
; p
< msgstr
+ msgstr_len
; )
673 size_t length
= strlen (p
);
675 if (is_string_selected (1, p
, length
))
681 /* Test translator comments using the --comment arguments. */
682 if (grep_task
[2].pattern_count
> 0
683 && mp
->comment
!= NULL
&& mp
->comment
->nitems
> 0)
692 for (j
= 0; j
< mp
->comment
->nitems
; j
++)
693 length
+= strlen (mp
->comment
->item
[j
]) + 1;
694 total_comment
= (char *) xallocsa (length
);
697 for (j
= 0; j
< mp
->comment
->nitems
; j
++)
699 size_t l
= strlen (mp
->comment
->item
[j
]);
701 memcpy (q
, mp
->comment
->item
[j
], l
);
705 if (q
!= total_comment
+ length
)
708 selected
= is_string_selected (2, total_comment
, length
);
710 freesa (total_comment
);
721 process_message_list (const char *domain
, message_list_ty
*mlp
)
723 if (string_list_member (domain_names
, domain
))
724 /* Keep all the messages in the list. */
727 /* Keep only the selected messages. */
728 message_list_remove_if_not (mlp
, is_message_selected
);
732 static msgdomain_list_ty
*
733 process_msgdomain_list (msgdomain_list_ty
*mdlp
)
737 for (k
= 0; k
< mdlp
->nitems
; k
++)
738 process_message_list (mdlp
->item
[k
]->domain
, mdlp
->item
[k
]->messages
);