No empty .Rs/.Re
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / msggrep.c
blobd1a36c06cf0c1a37bc050a42a4e8b32a7df13f66
1 /* Extract some translations of a translation catalog.
2 Copyright (C) 2001-2005 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23 #include <alloca.h>
25 #include <assert.h>
26 #include <errno.h>
27 #include <getopt.h>
28 #include <limits.h>
29 #include <locale.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
34 #ifdef HAVE_UNISTD_H
35 # include <unistd.h>
36 #elif defined _MSC_VER || defined __MINGW32__
37 # include <io.h>
38 #endif
40 #include <fnmatch.h>
42 #include "closeout.h"
43 #include "dir-list.h"
44 #include "error.h"
45 #include "error-progname.h"
46 #include "progname.h"
47 #include "relocatable.h"
48 #include "basename.h"
49 #include "message.h"
50 #include "read-po.h"
51 #include "write-po.h"
52 #include "str-list.h"
53 #include "msgl-charset.h"
54 #include "xalloc.h"
55 #include "xallocsa.h"
56 #include "exit.h"
57 #include "libgrep.h"
58 #include "gettext.h"
60 #define _(str) gettext (str)
63 /* Force output of PO file even if empty. */
64 static int force_po;
66 /* Selected source files. */
67 static string_list_ty *location_files;
69 /* Selected domain names. */
70 static string_list_ty *domain_names;
72 /* Task for each grep pass. */
73 struct grep_task {
74 matcher_t *matcher;
75 size_t pattern_count;
76 char *patterns;
77 size_t patterns_size;
78 bool case_insensitive;
79 void *compiled_patterns;
81 static struct grep_task grep_task[3];
83 /* Long options. */
84 static const struct option long_options[] =
86 { "add-location", no_argument, &line_comment, 1 },
87 { "comment", no_argument, NULL, 'C' },
88 { "directory", required_argument, NULL, 'D' },
89 { "domain", required_argument, NULL, 'M' },
90 { "escape", no_argument, NULL, CHAR_MAX + 1 },
91 { "extended-regexp", no_argument, NULL, 'E' },
92 { "file", required_argument, NULL, 'f' },
93 { "fixed-strings", no_argument, NULL, 'F' },
94 { "force-po", no_argument, &force_po, 1 },
95 { "help", no_argument, NULL, 'h' },
96 { "ignore-case", no_argument, NULL, 'i' },
97 { "indent", no_argument, NULL, CHAR_MAX + 2 },
98 { "location", required_argument, NULL, 'N' },
99 { "msgid", no_argument, NULL, 'K' },
100 { "msgstr", no_argument, NULL, 'T' },
101 { "no-escape", no_argument, NULL, CHAR_MAX + 3 },
102 { "no-location", no_argument, &line_comment, 0 },
103 { "no-wrap", no_argument, NULL, CHAR_MAX + 6 },
104 { "output-file", required_argument, NULL, 'o' },
105 { "properties-input", no_argument, NULL, 'P' },
106 { "properties-output", no_argument, NULL, 'p' },
107 { "regexp", required_argument, NULL, 'e' },
108 { "sort-by-file", no_argument, NULL, CHAR_MAX + 4 },
109 { "sort-output", no_argument, NULL, CHAR_MAX + 5 },
110 { "strict", no_argument, NULL, 'S' },
111 { "stringtable-input", no_argument, NULL, CHAR_MAX + 7 },
112 { "stringtable-output", no_argument, NULL, CHAR_MAX + 8 },
113 { "version", no_argument, NULL, 'V' },
114 { "width", required_argument, NULL, 'w' },
115 { NULL, 0, NULL, 0 }
119 /* Forward declaration of local functions. */
120 static void no_pass (int opt)
121 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
122 __attribute__ ((noreturn))
123 #endif
125 static void usage (int status)
126 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
127 __attribute__ ((noreturn))
128 #endif
130 static msgdomain_list_ty *process_msgdomain_list (msgdomain_list_ty *mdlp);
134 main (int argc, char **argv)
136 int opt;
137 bool do_help;
138 bool do_version;
139 char *output_file;
140 const char *input_file;
141 int grep_pass;
142 msgdomain_list_ty *result;
143 bool sort_by_filepos = false;
144 bool sort_by_msgid = false;
145 size_t i;
147 /* Set program name for messages. */
148 set_program_name (argv[0]);
149 error_print_progname = maybe_print_progname;
151 #ifdef HAVE_SETLOCALE
152 /* Set locale via LC_ALL. */
153 setlocale (LC_ALL, "");
154 #endif
156 /* Set the text message domain. */
157 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
158 textdomain (PACKAGE);
160 /* Ensure that write errors on stdout are detected. */
161 atexit (close_stdout);
163 /* Set default values for variables. */
164 do_help = false;
165 do_version = false;
166 output_file = NULL;
167 input_file = NULL;
168 grep_pass = -1;
169 location_files = string_list_alloc ();
170 domain_names = string_list_alloc ();
172 for (i = 0; i < 3; i++)
174 struct grep_task *gt = &grep_task[i];
176 gt->matcher = &matcher_grep;
177 gt->pattern_count = 0;
178 gt->patterns = NULL;
179 gt->patterns_size = 0;
180 gt->case_insensitive = false;
183 while ((opt = getopt_long (argc, argv, "CD:e:Ef:FhiKM:N:o:pPTVw:",
184 long_options, NULL))
185 != EOF)
186 switch (opt)
188 case '\0': /* Long option. */
189 break;
191 case 'C':
192 grep_pass = 2;
193 break;
195 case 'D':
196 dir_list_append (optarg);
197 break;
199 case 'e':
200 if (grep_pass < 0)
201 no_pass (opt);
203 struct grep_task *gt = &grep_task[grep_pass];
204 /* Append optarg and a newline to gt->patterns. */
205 size_t len = strlen (optarg);
206 gt->patterns =
207 (char *) xrealloc (gt->patterns, gt->patterns_size + len + 1);
208 memcpy (gt->patterns + gt->patterns_size, optarg, len);
209 gt->patterns_size += len;
210 *(gt->patterns + gt->patterns_size) = '\n';
211 gt->patterns_size += 1;
212 gt->pattern_count++;
214 break;
216 case 'E':
217 if (grep_pass < 0)
218 no_pass (opt);
219 grep_task[grep_pass].matcher = &matcher_egrep;
220 break;
222 case 'f':
223 if (grep_pass < 0)
224 no_pass (opt);
226 struct grep_task *gt = &grep_task[grep_pass];
227 /* Append the contents of the specified file to gt->patterns. */
228 FILE *fp = fopen (optarg, "r");
230 if (fp == NULL)
231 error (EXIT_FAILURE, errno, _("\
232 error while opening \"%s\" for reading"), optarg);
234 while (!feof (fp))
236 char buf[4096];
237 size_t count = fread (buf, 1, sizeof buf, fp);
239 if (count == 0)
241 if (ferror (fp))
242 error (EXIT_FAILURE, errno, _("\
243 error while reading \"%s\""), optarg);
244 /* EOF reached. */
245 break;
248 gt->patterns =
249 (char *) xrealloc (gt->patterns, gt->patterns_size + count);
250 memcpy (gt->patterns + gt->patterns_size, buf, count);
251 gt->patterns_size += count;
254 /* Append a final newline if file ended in a non-newline. */
255 if (gt->patterns_size > 0
256 && *(gt->patterns + gt->patterns_size - 1) != '\n')
258 gt->patterns =
259 (char *) xrealloc (gt->patterns, gt->patterns_size + 1);
260 *(gt->patterns + gt->patterns_size) = '\n';
261 gt->patterns_size += 1;
264 fclose (fp);
265 gt->pattern_count++;
267 break;
269 case 'F':
270 if (grep_pass < 0)
271 no_pass (opt);
272 grep_task[grep_pass].matcher = &matcher_fgrep;
273 break;
275 case 'h':
276 do_help = true;
277 break;
279 case 'i':
280 if (grep_pass < 0)
281 no_pass (opt);
282 grep_task[grep_pass].case_insensitive = true;
283 break;
285 case 'K':
286 grep_pass = 0;
287 break;
289 case 'M':
290 string_list_append (domain_names, optarg);
291 break;
293 case 'N':
294 string_list_append (location_files, optarg);
295 break;
297 case 'o':
298 output_file = optarg;
299 break;
301 case 'p':
302 message_print_syntax_properties ();
303 break;
305 case 'P':
306 input_syntax = syntax_properties;
307 break;
309 case 'S':
310 message_print_style_uniforum ();
311 break;
313 case 'T':
314 grep_pass = 1;
315 break;
317 case 'V':
318 do_version = true;
319 break;
321 case 'w':
323 int value;
324 char *endp;
325 value = strtol (optarg, &endp, 10);
326 if (endp != optarg)
327 message_page_width_set (value);
329 break;
331 case CHAR_MAX + 1:
332 message_print_style_escape (true);
333 break;
335 case CHAR_MAX + 2:
336 message_print_style_indent ();
337 break;
339 case CHAR_MAX + 3:
340 message_print_style_escape (false);
341 break;
343 case CHAR_MAX + 4:
344 sort_by_filepos = true;
345 break;
347 case CHAR_MAX + 5:
348 sort_by_msgid = true;
349 break;
351 case CHAR_MAX + 6: /* --no-wrap */
352 message_page_width_ignore ();
353 break;
355 case CHAR_MAX + 7: /* --stringtable-input */
356 input_syntax = syntax_stringtable;
357 break;
359 case CHAR_MAX + 8: /* --stringtable-output */
360 message_print_syntax_stringtable ();
361 break;
363 default:
364 usage (EXIT_FAILURE);
365 break;
368 /* Version information is requested. */
369 if (do_version)
371 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
372 /* xgettext: no-wrap */
373 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
374 This is free software; see the source for copying conditions. There is NO\n\
375 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
377 "2001-2005");
378 printf (_("Written by %s.\n"), "Bruno Haible");
379 exit (EXIT_SUCCESS);
382 /* Help is requested. */
383 if (do_help)
384 usage (EXIT_SUCCESS);
386 /* Test whether we have an .po file name as argument. */
387 if (optind == argc)
388 input_file = "-";
389 else if (optind + 1 == argc)
390 input_file = argv[optind];
391 else
393 error (EXIT_SUCCESS, 0, _("at most one input file allowed"));
394 usage (EXIT_FAILURE);
397 /* Verify selected options. */
398 if (!line_comment && sort_by_filepos)
399 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
400 "--no-location", "--sort-by-file");
402 if (sort_by_msgid && sort_by_filepos)
403 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
404 "--sort-output", "--sort-by-file");
406 /* Compile the patterns. */
407 for (grep_pass = 0; grep_pass < 3; grep_pass++)
409 struct grep_task *gt = &grep_task[grep_pass];
411 if (gt->pattern_count > 0)
413 if (gt->patterns_size > 0)
415 /* Strip trailing newline. */
416 assert (gt->patterns[gt->patterns_size - 1] == '\n');
417 gt->patterns_size--;
419 gt->compiled_patterns =
420 gt->matcher->compile (gt->patterns, gt->patterns_size,
421 gt->case_insensitive, false, false, '\n');
425 /* Read input file. */
426 result = read_po_file (input_file);
428 if (grep_task[0].pattern_count > 0
429 || grep_task[1].pattern_count > 0
430 || grep_task[2].pattern_count > 0)
432 /* Warn if the current locale is not suitable for this PO file. */
433 compare_po_locale_charsets (result);
436 /* Select the messages. */
437 result = process_msgdomain_list (result);
439 /* Sort the results. */
440 if (sort_by_filepos)
441 msgdomain_list_sort_by_filepos (result);
442 else if (sort_by_msgid)
443 msgdomain_list_sort_by_msgid (result);
445 /* Write the merged message list out. */
446 msgdomain_list_print (result, output_file, force_po, false);
448 exit (EXIT_SUCCESS);
452 static void
453 no_pass (int opt)
455 error (EXIT_SUCCESS, 0,
456 _("option '%c' cannot be used before 'K' or 'T' or 'C' has been specified"),
457 opt);
458 usage (EXIT_FAILURE);
462 /* Display usage information and exit. */
463 static void
464 usage (int status)
466 if (status != EXIT_SUCCESS)
467 fprintf (stderr, _("Try `%s --help' for more information.\n"),
468 program_name);
469 else
471 printf (_("\
472 Usage: %s [OPTION] [INPUTFILE]\n\
473 "), program_name);
474 printf ("\n");
475 /* xgettext: no-wrap */
476 printf (_("\
477 Extracts all messages of a translation catalog that match a given pattern\n\
478 or belong to some given source files.\n\
479 "));
480 printf ("\n");
481 printf (_("\
482 Mandatory arguments to long options are mandatory for short options too.\n"));
483 printf ("\n");
484 printf (_("\
485 Input file location:\n"));
486 printf (_("\
487 INPUTFILE input PO file\n"));
488 printf (_("\
489 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
490 printf (_("\
491 If no input file is given or if it is -, standard input is read.\n"));
492 printf ("\n");
493 printf (_("\
494 Output file location:\n"));
495 printf (_("\
496 -o, --output-file=FILE write output to specified file\n"));
497 printf (_("\
498 The results are written to standard output if no output file is specified\n\
499 or if it is -.\n"));
500 printf ("\n");
501 /* xgettext: no-wrap */
502 printf (_("\
503 Message selection:\n\
504 [-N SOURCEFILE]... [-M DOMAINNAME]...\n\
505 [-K MSGID-PATTERN] [-T MSGSTR-PATTERN] [-C COMMENT-PATTERN]\n\
506 A message is selected if it comes from one of the specified source files,\n\
507 or if it comes from one of the specified domains,\n\
508 or if -K is given and its key (msgid or msgid_plural) matches MSGID-PATTERN,\n\
509 or if -T is given and its translation (msgstr) matches MSGSTR-PATTERN,\n\
510 or if -C is given and the translator's comment matches COMMENT-PATTERN.\n\
512 When more than one selection criterion is specified, the set of selected\n\
513 messages is the union of the selected messages of each criterion.\n\
515 MSGID-PATTERN or MSGSTR-PATTERN or COMMENT-PATTERN syntax:\n\
516 [-E | -F] [-e PATTERN | -f FILE]...\n\
517 PATTERNs are basic regular expressions by default, or extended regular\n\
518 expressions if -E is given, or fixed strings if -F is given.\n\
520 -N, --location=SOURCEFILE select messages extracted from SOURCEFILE\n\
521 -M, --domain=DOMAINNAME select messages belonging to domain DOMAINNAME\n\
522 -K, --msgid start of patterns for the msgid\n\
523 -T, --msgstr start of patterns for the msgstr\n\
524 -C, --comment start of patterns for the translator's comment\n\
525 -E, --extended-regexp PATTERN is an extended regular expression\n\
526 -F, --fixed-strings PATTERN is a set of newline-separated strings\n\
527 -e, --regexp=PATTERN use PATTERN as a regular expression\n\
528 -f, --file=FILE obtain PATTERN from FILE\n\
529 -i, --ignore-case ignore case distinctions\n\
530 "));
531 printf ("\n");
532 printf (_("\
533 Input file syntax:\n"));
534 printf (_("\
535 -P, --properties-input input file is in Java .properties syntax\n"));
536 printf (_("\
537 --stringtable-input input file is in NeXTstep/GNUstep .strings syntax\n"));
538 printf ("\n");
539 printf (_("\
540 Output details:\n"));
541 printf (_("\
542 --no-escape do not use C escapes in output (default)\n"));
543 printf (_("\
544 --escape use C escapes in output, no extended chars\n"));
545 printf (_("\
546 --force-po write PO file even if empty\n"));
547 printf (_("\
548 --indent indented output style\n"));
549 printf (_("\
550 --no-location suppress '#: filename:line' lines\n"));
551 printf (_("\
552 --add-location preserve '#: filename:line' lines (default)\n"));
553 printf (_("\
554 --strict strict Uniforum output style\n"));
555 printf (_("\
556 -p, --properties-output write out a Java .properties file\n"));
557 printf (_("\
558 --stringtable-output write out a NeXTstep/GNUstep .strings file\n"));
559 printf (_("\
560 -w, --width=NUMBER set output page width\n"));
561 printf (_("\
562 --no-wrap do not break long message lines, longer than\n\
563 the output page width, into several lines\n"));
564 printf (_("\
565 --sort-output generate sorted output\n"));
566 printf (_("\
567 --sort-by-file sort output by file location\n"));
568 printf ("\n");
569 printf (_("\
570 Informative output:\n"));
571 printf (_("\
572 -h, --help display this help and exit\n"));
573 printf (_("\
574 -V, --version output version information and exit\n"));
575 printf ("\n");
576 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
577 stdout);
580 exit (status);
584 /* Return 1 if FILENAME is contained in a list of filename patterns,
585 0 otherwise. */
586 static bool
587 filename_list_match (const string_list_ty *slp, const char *filename)
589 size_t j;
591 for (j = 0; j < slp->nitems; ++j)
592 if (fnmatch (slp->item[j], filename, FNM_PATHNAME) == 0)
593 return true;
594 return false;
598 #ifdef EINTR
600 /* EINTR handling for close().
601 These functions can return -1/EINTR even though we don't have any
602 signal handlers set up, namely when we get interrupted via SIGSTOP. */
604 static inline int
605 nonintr_close (int fd)
607 int retval;
610 retval = close (fd);
611 while (retval < 0 && errno == EINTR);
613 return retval;
615 #define close nonintr_close
617 #endif
620 /* Process a string STR of size LEN bytes through grep, and return true
621 if it matches. */
622 static bool
623 is_string_selected (int grep_pass, const char *str, size_t len)
625 const struct grep_task *gt = &grep_task[grep_pass];
627 if (gt->pattern_count > 0)
629 size_t match_size;
630 size_t match_offset;
632 match_offset =
633 gt->matcher->execute (gt->compiled_patterns, str, len,
634 &match_size, false);
635 return (match_offset != (size_t) -1);
637 else
638 return 0;
642 /* Return true if a message matches. */
643 static bool
644 is_message_selected (const message_ty *mp)
646 size_t i;
647 const char *msgstr;
648 size_t msgstr_len;
649 const char *p;
651 /* Always keep the header entry. */
652 if (mp->msgid[0] == '\0')
653 return true;
655 /* Test whether one of mp->filepos[] is selected. */
656 for (i = 0; i < mp->filepos_count; i++)
657 if (filename_list_match (location_files, mp->filepos[i].file_name))
658 return true;
660 /* Test msgid and msgid_plural using the --msgid arguments. */
661 if (is_string_selected (0, mp->msgid, strlen (mp->msgid)))
662 return true;
663 if (mp->msgid_plural != NULL
664 && is_string_selected (0, mp->msgid_plural, strlen (mp->msgid_plural)))
665 return true;
667 /* Test msgstr using the --msgstr arguments. */
668 msgstr = mp->msgstr;
669 msgstr_len = mp->msgstr_len;
670 /* Process each NUL delimited substring separately. */
671 for (p = msgstr; p < msgstr + msgstr_len; )
673 size_t length = strlen (p);
675 if (is_string_selected (1, p, length))
676 return true;
678 p += length + 1;
681 /* Test translator comments using the --comment arguments. */
682 if (grep_task[2].pattern_count > 0
683 && mp->comment != NULL && mp->comment->nitems > 0)
685 size_t length;
686 char *total_comment;
687 char *q;
688 size_t j;
689 bool selected;
691 length = 0;
692 for (j = 0; j < mp->comment->nitems; j++)
693 length += strlen (mp->comment->item[j]) + 1;
694 total_comment = (char *) xallocsa (length);
696 q = total_comment;
697 for (j = 0; j < mp->comment->nitems; j++)
699 size_t l = strlen (mp->comment->item[j]);
701 memcpy (q, mp->comment->item[j], l);
702 q += l;
703 *q++ = '\n';
705 if (q != total_comment + length)
706 abort ();
708 selected = is_string_selected (2, total_comment, length);
710 freesa (total_comment);
712 if (selected)
713 return true;
716 return false;
720 static void
721 process_message_list (const char *domain, message_list_ty *mlp)
723 if (string_list_member (domain_names, domain))
724 /* Keep all the messages in the list. */
726 else
727 /* Keep only the selected messages. */
728 message_list_remove_if_not (mlp, is_message_selected);
732 static msgdomain_list_ty *
733 process_msgdomain_list (msgdomain_list_ty *mdlp)
735 size_t k;
737 for (k = 0; k < mdlp->nitems; k++)
738 process_message_list (mdlp->item[k]->domain, mdlp->item[k]->messages);
740 return mdlp;