Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / msgmerge.c
blob500b235a180d7f994626688ada902406aa59839d
1 /* GNU gettext - internationalization aids
2 Copyright (C) 1995-1998, 2000-2005 Free Software Foundation, Inc.
3 This file was written by Peter Miller <millerp@canb.auug.org.au>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 #include <alloca.h>
24 #include <getopt.h>
25 #include <limits.h>
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <locale.h>
32 #include "closeout.h"
33 #include "dir-list.h"
34 #include "error.h"
35 #include "error-progname.h"
36 #include "progname.h"
37 #include "relocatable.h"
38 #include "basename.h"
39 #include "message.h"
40 #include "read-po.h"
41 #include "write-po.h"
42 #include "format.h"
43 #include "xalloc.h"
44 #include "obstack.h"
45 #include "strstr.h"
46 #include "exit.h"
47 #include "c-strcase.h"
48 #include "stpcpy.h"
49 #include "stpncpy.h"
50 #include "msgl-iconv.h"
51 #include "msgl-equal.h"
52 #include "plural-count.h"
53 #include "backupfile.h"
54 #include "copy-file.h"
55 #include "gettext.h"
57 #define _(str) gettext (str)
59 #define obstack_chunk_alloc xmalloc
60 #define obstack_chunk_free free
63 /* If true do not print unneeded messages. */
64 static bool quiet;
66 /* Verbosity level. */
67 static int verbosity_level;
69 /* Force output of PO file even if empty. */
70 static int force_po;
72 /* Apply the .pot file to each of the domains in the PO file. */
73 static bool multi_domain_mode = false;
75 /* Determines whether to use fuzzy matching. */
76 static bool use_fuzzy_matching = true;
78 /* List of user-specified compendiums. */
79 static message_list_list_ty *compendiums;
81 /* Update mode. */
82 static bool update_mode = false;
83 static const char *version_control_string;
84 static const char *backup_suffix_string;
86 /* Long options. */
87 static const struct option long_options[] =
89 { "add-location", no_argument, &line_comment, 1 },
90 { "backup", required_argument, NULL, CHAR_MAX + 1 },
91 { "compendium", required_argument, NULL, 'C', },
92 { "directory", required_argument, NULL, 'D' },
93 { "escape", no_argument, NULL, 'E' },
94 { "force-po", no_argument, &force_po, 1 },
95 { "help", no_argument, NULL, 'h' },
96 { "indent", no_argument, NULL, 'i' },
97 { "multi-domain", no_argument, NULL, 'm' },
98 { "no-escape", no_argument, NULL, 'e' },
99 { "no-fuzzy-matching", no_argument, NULL, 'N' },
100 { "no-location", no_argument, &line_comment, 0 },
101 { "no-wrap", no_argument, NULL, CHAR_MAX + 4 },
102 { "output-file", required_argument, NULL, 'o' },
103 { "properties-input", no_argument, NULL, 'P' },
104 { "properties-output", no_argument, NULL, 'p' },
105 { "quiet", no_argument, NULL, 'q' },
106 { "sort-by-file", no_argument, NULL, 'F' },
107 { "sort-output", no_argument, NULL, 's' },
108 { "silent", no_argument, NULL, 'q' },
109 { "strict", no_argument, NULL, CHAR_MAX + 2 },
110 { "stringtable-input", no_argument, NULL, CHAR_MAX + 5 },
111 { "stringtable-output", no_argument, NULL, CHAR_MAX + 6 },
112 { "suffix", required_argument, NULL, CHAR_MAX + 3 },
113 { "update", no_argument, NULL, 'U' },
114 { "verbose", no_argument, NULL, 'v' },
115 { "version", no_argument, NULL, 'V' },
116 { "width", required_argument, NULL, 'w', },
117 { NULL, 0, NULL, 0 }
121 struct statistics
123 size_t merged;
124 size_t fuzzied;
125 size_t missing;
126 size_t obsolete;
130 /* Forward declaration of local functions. */
131 static void usage (int status)
132 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
133 __attribute__ ((noreturn))
134 #endif
136 static void compendium (const char *filename);
137 static msgdomain_list_ty *merge (const char *fn1, const char *fn2,
138 msgdomain_list_ty **defp);
142 main (int argc, char **argv)
144 int opt;
145 bool do_help;
146 bool do_version;
147 char *output_file;
148 msgdomain_list_ty *def;
149 msgdomain_list_ty *result;
150 bool sort_by_filepos = false;
151 bool sort_by_msgid = false;
153 /* Set program name for messages. */
154 set_program_name (argv[0]);
155 error_print_progname = maybe_print_progname;
156 verbosity_level = 0;
157 quiet = false;
158 gram_max_allowed_errors = UINT_MAX;
160 #ifdef HAVE_SETLOCALE
161 /* Set locale via LC_ALL. */
162 setlocale (LC_ALL, "");
163 #endif
165 /* Set the text message domain. */
166 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
167 textdomain (PACKAGE);
169 /* Ensure that write errors on stdout are detected. */
170 atexit (close_stdout);
172 /* Set default values for variables. */
173 do_help = false;
174 do_version = false;
175 output_file = NULL;
177 while ((opt = getopt_long (argc, argv, "C:D:eEFhimNo:pPqsUvVw:",
178 long_options, NULL))
179 != EOF)
180 switch (opt)
182 case '\0': /* Long option. */
183 break;
185 case 'C':
186 compendium (optarg);
187 break;
189 case 'D':
190 dir_list_append (optarg);
191 break;
193 case 'e':
194 message_print_style_escape (false);
195 break;
197 case 'E':
198 message_print_style_escape (true);
199 break;
201 case 'F':
202 sort_by_filepos = true;
203 break;
205 case 'h':
206 do_help = true;
207 break;
209 case 'i':
210 message_print_style_indent ();
211 break;
213 case 'm':
214 multi_domain_mode = true;
215 break;
217 case 'N':
218 use_fuzzy_matching = false;
219 break;
221 case 'o':
222 output_file = optarg;
223 break;
225 case 'p':
226 message_print_syntax_properties ();
227 break;
229 case 'P':
230 input_syntax = syntax_properties;
231 break;
233 case 'q':
234 quiet = true;
235 break;
237 case 's':
238 sort_by_msgid = true;
239 break;
241 case 'U':
242 update_mode = true;
243 break;
245 case 'v':
246 ++verbosity_level;
247 break;
249 case 'V':
250 do_version = true;
251 break;
253 case 'w':
255 int value;
256 char *endp;
257 value = strtol (optarg, &endp, 10);
258 if (endp != optarg)
259 message_page_width_set (value);
261 break;
263 case CHAR_MAX + 1: /* --backup */
264 version_control_string = optarg;
265 break;
267 case CHAR_MAX + 2: /* --strict */
268 message_print_style_uniforum ();
269 break;
271 case CHAR_MAX + 3: /* --suffix */
272 backup_suffix_string = optarg;
273 break;
275 case CHAR_MAX + 4: /* --no-wrap */
276 message_page_width_ignore ();
277 break;
279 case CHAR_MAX + 5: /* --stringtable-input */
280 input_syntax = syntax_stringtable;
281 break;
283 case CHAR_MAX + 6: /* --stringtable-output */
284 message_print_syntax_stringtable ();
285 break;
287 default:
288 usage (EXIT_FAILURE);
289 break;
292 /* Version information is requested. */
293 if (do_version)
295 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
296 /* xgettext: no-wrap */
297 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
298 This is free software; see the source for copying conditions. There is NO\n\
299 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
301 "1995-1998, 2000-2005");
302 printf (_("Written by %s.\n"), "Peter Miller");
303 exit (EXIT_SUCCESS);
306 /* Help is requested. */
307 if (do_help)
308 usage (EXIT_SUCCESS);
310 /* Test whether we have an .po file name as argument. */
311 if (optind >= argc)
313 error (EXIT_SUCCESS, 0, _("no input files given"));
314 usage (EXIT_FAILURE);
316 if (optind + 2 != argc)
318 error (EXIT_SUCCESS, 0, _("exactly 2 input files required"));
319 usage (EXIT_FAILURE);
322 /* Verify selected options. */
323 if (update_mode)
325 if (output_file != NULL)
327 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
328 "--update", "--output-file");
331 else
333 if (version_control_string != NULL)
335 error (EXIT_SUCCESS, 0, _("%s is only valid with %s"),
336 "--backup", "--update");
337 usage (EXIT_FAILURE);
339 if (backup_suffix_string != NULL)
341 error (EXIT_SUCCESS, 0, _("%s is only valid with %s"),
342 "--suffix", "--update");
343 usage (EXIT_FAILURE);
347 if (!line_comment && sort_by_filepos)
348 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
349 "--no-location", "--sort-by-file");
351 if (sort_by_msgid && sort_by_filepos)
352 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
353 "--sort-output", "--sort-by-file");
355 /* In update mode, --properties-input implies --properties-output. */
356 if (update_mode && input_syntax == syntax_properties)
357 message_print_syntax_properties ();
358 /* In update mode, --stringtable-input implies --stringtable-output. */
359 if (update_mode && input_syntax == syntax_stringtable)
360 message_print_syntax_stringtable ();
362 /* Merge the two files. */
363 result = merge (argv[optind], argv[optind + 1], &def);
365 /* Sort the results. */
366 if (sort_by_filepos)
367 msgdomain_list_sort_by_filepos (result);
368 else if (sort_by_msgid)
369 msgdomain_list_sort_by_msgid (result);
371 if (update_mode)
373 /* Do nothing if the original file and the result are equal. Also do
374 nothing if the original file and the result differ only by the
375 POT-Creation-Date in the header entry; this is needed for projects
376 which don't put the .pot file under CVS. */
377 if (!msgdomain_list_equal (def, result, true))
379 /* Back up def.po. */
380 enum backup_type backup_type;
381 char *backup_file;
383 output_file = argv[optind];
385 if (backup_suffix_string == NULL)
387 backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX");
388 if (backup_suffix_string != NULL
389 && backup_suffix_string[0] == '\0')
390 backup_suffix_string = NULL;
392 if (backup_suffix_string != NULL)
393 simple_backup_suffix = backup_suffix_string;
395 backup_type = xget_version (_("backup type"), version_control_string);
396 if (backup_type != none)
398 backup_file = find_backup_file_name (output_file, backup_type);
399 copy_file_preserving (output_file, backup_file);
402 /* Write the merged message list out. */
403 msgdomain_list_print (result, output_file, true, false);
406 else
408 /* Write the merged message list out. */
409 msgdomain_list_print (result, output_file, force_po, false);
412 exit (EXIT_SUCCESS);
416 /* Display usage information and exit. */
417 static void
418 usage (int status)
420 if (status != EXIT_SUCCESS)
421 fprintf (stderr, _("Try `%s --help' for more information.\n"),
422 program_name);
423 else
425 printf (_("\
426 Usage: %s [OPTION] def.po ref.pot\n\
427 "), program_name);
428 printf ("\n");
429 /* xgettext: no-wrap */
430 printf (_("\
431 Merges two Uniforum style .po files together. The def.po file is an\n\
432 existing PO file with translations which will be taken over to the newly\n\
433 created file as long as they still match; comments will be preserved,\n\
434 but extracted comments and file positions will be discarded. The ref.pot\n\
435 file is the last created PO file with up-to-date source references but\n\
436 old translations, or a PO Template file (generally created by xgettext);\n\
437 any translations or comments in the file will be discarded, however dot\n\
438 comments and file positions will be preserved. Where an exact match\n\
439 cannot be found, fuzzy matching is used to produce better results.\n\
440 "));
441 printf ("\n");
442 printf (_("\
443 Mandatory arguments to long options are mandatory for short options too.\n"));
444 printf ("\n");
445 printf (_("\
446 Input file location:\n"));
447 printf (_("\
448 def.po translations referring to old sources\n"));
449 printf (_("\
450 ref.pot references to new sources\n"));
451 printf (_("\
452 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
453 printf (_("\
454 -C, --compendium=FILE additional library of message translations,\n\
455 may be specified more than once\n"));
456 printf ("\n");
457 printf (_("\
458 Operation mode:\n"));
459 printf (_("\
460 -U, --update update def.po,\n\
461 do nothing if def.po already up to date\n"));
462 printf ("\n");
463 printf (_("\
464 Output file location:\n"));
465 printf (_("\
466 -o, --output-file=FILE write output to specified file\n"));
467 printf (_("\
468 The results are written to standard output if no output file is specified\n\
469 or if it is -.\n"));
470 printf ("\n");
471 printf (_("\
472 Output file location in update mode:\n"));
473 printf (_("\
474 The result is written back to def.po.\n"));
475 printf (_("\
476 --backup=CONTROL make a backup of def.po\n"));
477 printf (_("\
478 --suffix=SUFFIX override the usual backup suffix\n"));
479 printf (_("\
480 The version control method may be selected via the --backup option or through\n\
481 the VERSION_CONTROL environment variable. Here are the values:\n\
482 none, off never make backups (even if --backup is given)\n\
483 numbered, t make numbered backups\n\
484 existing, nil numbered if numbered backups exist, simple otherwise\n\
485 simple, never always make simple backups\n"));
486 printf (_("\
487 The backup suffix is `~', unless set with --suffix or the SIMPLE_BACKUP_SUFFIX\n\
488 environment variable.\n\
489 "));
490 printf ("\n");
491 printf (_("\
492 Operation modifiers:\n"));
493 printf (_("\
494 -m, --multi-domain apply ref.pot to each of the domains in def.po\n"));
495 printf (_("\
496 -N, --no-fuzzy-matching do not use fuzzy matching\n"));
497 printf ("\n");
498 printf (_("\
499 Input file syntax:\n"));
500 printf (_("\
501 -P, --properties-input input files are in Java .properties syntax\n"));
502 printf (_("\
503 --stringtable-input input files are in NeXTstep/GNUstep .strings\n\
504 syntax\n"));
505 printf ("\n");
506 printf (_("\
507 Output details:\n"));
508 printf (_("\
509 -e, --no-escape do not use C escapes in output (default)\n"));
510 printf (_("\
511 -E, --escape use C escapes in output, no extended chars\n"));
512 printf (_("\
513 --force-po write PO file even if empty\n"));
514 printf (_("\
515 -i, --indent indented output style\n"));
516 printf (_("\
517 --no-location suppress '#: filename:line' lines\n"));
518 printf (_("\
519 --add-location preserve '#: filename:line' lines (default)\n"));
520 printf (_("\
521 --strict strict Uniforum output style\n"));
522 printf (_("\
523 -p, --properties-output write out a Java .properties file\n"));
524 printf (_("\
525 --stringtable-output write out a NeXTstep/GNUstep .strings file\n"));
526 printf (_("\
527 -w, --width=NUMBER set output page width\n"));
528 printf (_("\
529 --no-wrap do not break long message lines, longer than\n\
530 the output page width, into several lines\n"));
531 printf (_("\
532 -s, --sort-output generate sorted output\n"));
533 printf (_("\
534 -F, --sort-by-file sort output by file location\n"));
535 printf ("\n");
536 printf (_("\
537 Informative output:\n"));
538 printf (_("\
539 -h, --help display this help and exit\n"));
540 printf (_("\
541 -V, --version output version information and exit\n"));
542 printf (_("\
543 -v, --verbose increase verbosity level\n"));
544 printf (_("\
545 -q, --quiet, --silent suppress progress indicators\n"));
546 printf ("\n");
547 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
548 stdout);
551 exit (status);
555 static void
556 compendium (const char *filename)
558 msgdomain_list_ty *mdlp;
559 size_t k;
561 mdlp = read_po_file (filename);
562 if (!compendiums)
563 compendiums = message_list_list_alloc ();
564 for (k = 0; k < mdlp->nitems; k++)
565 message_list_list_append (compendiums, mdlp->item[k]->messages);
569 static bool
570 msgfmt_check_pair_fails (const lex_pos_ty *pos,
571 const char *msgid, const char *msgid_plural,
572 const char *msgstr, size_t msgstr_len,
573 size_t fmt)
575 bool failure;
576 struct formatstring_parser *parser = formatstring_parsers[fmt];
577 char *invalid_reason = NULL;
578 void *msgid_descr =
579 parser->parse (msgid_plural != NULL ? msgid_plural : msgid, false,
580 &invalid_reason);
582 failure = false;
583 if (msgid_descr != NULL)
585 const char *p_end = msgstr + msgstr_len;
586 const char *p;
588 for (p = msgstr; p < p_end; p += strlen (p) + 1)
590 void *msgstr_descr = parser->parse (msgstr, true, &invalid_reason);
592 if (msgstr_descr != NULL)
594 failure = parser->check (msgid_descr, msgstr_descr,
595 msgid_plural == NULL, NULL, NULL);
596 parser->free (msgstr_descr);
598 else
600 failure = true;
601 free (invalid_reason);
604 if (failure)
605 break;
608 parser->free (msgid_descr);
610 else
611 free (invalid_reason);
613 return failure;
617 static message_ty *
618 message_merge (message_ty *def, message_ty *ref)
620 const char *msgstr;
621 size_t msgstr_len;
622 message_ty *result;
623 size_t j, i;
625 /* Take the msgid from the reference. When fuzzy matches are made,
626 the definition will not be unique, but the reference will be -
627 usually because it has only been slightly changed. */
629 /* Take the msgstr from the definition. The msgstr of the reference
630 is usually empty, as it was generated by xgettext. If we currently
631 process the header entry we have to merge the msgstr by using the
632 Report-Msgid-Bugs-To and POT-Creation-Date fields from the reference. */
633 if (ref->msgid[0] == '\0')
635 /* Oh, oh. The header entry and we have something to fill in. */
636 static const struct
638 const char *name;
639 size_t len;
640 } known_fields[] =
642 { "Project-Id-Version:", sizeof ("Project-Id-Version:") - 1 },
643 #define PROJECT_ID 0
644 { "Report-Msgid-Bugs-To:", sizeof ("Report-Msgid-Bugs-To:") - 1 },
645 #define REPORT_MSGID_BUGS_TO 1
646 { "POT-Creation-Date:", sizeof ("POT-Creation-Date:") - 1 },
647 #define POT_CREATION_DATE 2
648 { "PO-Revision-Date:", sizeof ("PO-Revision-Date:") - 1 },
649 #define PO_REVISION_DATE 3
650 { "Last-Translator:", sizeof ("Last-Translator:") - 1 },
651 #define LAST_TRANSLATOR 4
652 { "Language-Team:", sizeof ("Language-Team:") - 1 },
653 #define LANGUAGE_TEAM 5
654 { "MIME-Version:", sizeof ("MIME-Version:") - 1 },
655 #define MIME_VERSION 6
656 { "Content-Type:", sizeof ("Content-Type:") - 1 },
657 #define CONTENT_TYPE 7
658 { "Content-Transfer-Encoding:",
659 sizeof ("Content-Transfer-Encoding:") - 1 }
660 #define CONTENT_TRANSFER 8
662 #define UNKNOWN 9
663 struct
665 const char *string;
666 size_t len;
667 } header_fields[UNKNOWN + 1];
668 struct obstack pool;
669 const char *cp;
670 char *newp;
671 size_t len, cnt;
673 /* Clear all fields. */
674 memset (header_fields, '\0', sizeof (header_fields));
676 /* Prepare a temporary memory pool. */
677 obstack_init (&pool);
679 cp = def->msgstr;
680 while (*cp != '\0')
682 const char *endp = strchr (cp, '\n');
683 int terminated = endp != NULL;
685 if (!terminated)
687 /* Add a trailing newline. */
688 char *copy;
689 endp = strchr (cp, '\0');
691 len = endp - cp + 1;
693 copy = (char *) obstack_alloc (&pool, len + 1);
694 stpcpy (stpcpy (copy, cp), "\n");
695 cp = copy;
697 else
699 len = (endp - cp) + 1;
700 ++endp;
703 /* Compare with any of the known fields. */
704 for (cnt = 0;
705 cnt < sizeof (known_fields) / sizeof (known_fields[0]);
706 ++cnt)
707 if (c_strncasecmp (cp, known_fields[cnt].name, known_fields[cnt].len)
708 == 0)
709 break;
711 if (cnt < sizeof (known_fields) / sizeof (known_fields[0]))
713 header_fields[cnt].string = &cp[known_fields[cnt].len];
714 header_fields[cnt].len = len - known_fields[cnt].len;
716 else
718 /* It's an unknown field. Append content to what is already
719 known. */
720 char *extended =
721 (char *) obstack_alloc (&pool,
722 header_fields[UNKNOWN].len + len + 1);
723 memcpy (extended, header_fields[UNKNOWN].string,
724 header_fields[UNKNOWN].len);
725 memcpy (&extended[header_fields[UNKNOWN].len], cp, len);
726 extended[header_fields[UNKNOWN].len + len] = '\0';
727 header_fields[UNKNOWN].string = extended;
728 header_fields[UNKNOWN].len += len;
731 cp = endp;
735 const char *msgid_bugs_ptr;
737 msgid_bugs_ptr = strstr (ref->msgstr, "Report-Msgid-Bugs-To:");
738 if (msgid_bugs_ptr != NULL)
740 size_t msgid_bugs_len;
741 const char *endp;
743 msgid_bugs_ptr += sizeof ("Report-Msgid-Bugs-To:") - 1;
745 endp = strchr (msgid_bugs_ptr, '\n');
746 if (endp == NULL)
748 /* Add a trailing newline. */
749 char *extended;
750 endp = strchr (msgid_bugs_ptr, '\0');
751 msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
752 extended = (char *) obstack_alloc (&pool, msgid_bugs_len + 1);
753 stpcpy (stpcpy (extended, msgid_bugs_ptr), "\n");
754 msgid_bugs_ptr = extended;
756 else
757 msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
759 header_fields[REPORT_MSGID_BUGS_TO].string = msgid_bugs_ptr;
760 header_fields[REPORT_MSGID_BUGS_TO].len = msgid_bugs_len;
765 const char *pot_date_ptr;
767 pot_date_ptr = strstr (ref->msgstr, "POT-Creation-Date:");
768 if (pot_date_ptr != NULL)
770 size_t pot_date_len;
771 const char *endp;
773 pot_date_ptr += sizeof ("POT-Creation-Date:") - 1;
775 endp = strchr (pot_date_ptr, '\n');
776 if (endp == NULL)
778 /* Add a trailing newline. */
779 char *extended;
780 endp = strchr (pot_date_ptr, '\0');
781 pot_date_len = (endp - pot_date_ptr) + 1;
782 extended = (char *) obstack_alloc (&pool, pot_date_len + 1);
783 stpcpy (stpcpy (extended, pot_date_ptr), "\n");
784 pot_date_ptr = extended;
786 else
787 pot_date_len = (endp - pot_date_ptr) + 1;
789 header_fields[POT_CREATION_DATE].string = pot_date_ptr;
790 header_fields[POT_CREATION_DATE].len = pot_date_len;
794 /* Concatenate all the various fields. */
795 len = 0;
796 for (cnt = 0; cnt < UNKNOWN; ++cnt)
797 if (header_fields[cnt].string != NULL)
798 len += known_fields[cnt].len + header_fields[cnt].len;
799 len += header_fields[UNKNOWN].len;
801 cp = newp = (char *) xmalloc (len + 1);
802 newp[len] = '\0';
804 #define IF_FILLED(idx) \
805 if (header_fields[idx].string) \
806 newp = stpncpy (stpcpy (newp, known_fields[idx].name), \
807 header_fields[idx].string, header_fields[idx].len)
809 IF_FILLED (PROJECT_ID);
810 IF_FILLED (REPORT_MSGID_BUGS_TO);
811 IF_FILLED (POT_CREATION_DATE);
812 IF_FILLED (PO_REVISION_DATE);
813 IF_FILLED (LAST_TRANSLATOR);
814 IF_FILLED (LANGUAGE_TEAM);
815 IF_FILLED (MIME_VERSION);
816 IF_FILLED (CONTENT_TYPE);
817 IF_FILLED (CONTENT_TRANSFER);
818 if (header_fields[UNKNOWN].string != NULL)
819 stpcpy (newp, header_fields[UNKNOWN].string);
821 #undef IF_FILLED
823 /* Free the temporary memory pool. */
824 obstack_free (&pool, NULL);
826 msgstr = cp;
827 msgstr_len = strlen (cp) + 1;
829 else
831 msgstr = def->msgstr;
832 msgstr_len = def->msgstr_len;
835 result = message_alloc (xstrdup (ref->msgid), ref->msgid_plural,
836 msgstr, msgstr_len, &def->pos);
838 /* Take the comments from the definition file. There will be none at
839 all in the reference file, as it was generated by xgettext. */
840 if (def->comment)
841 for (j = 0; j < def->comment->nitems; ++j)
842 message_comment_append (result, def->comment->item[j]);
844 /* Take the dot comments from the reference file, as they are
845 generated by xgettext. Any in the definition file are old ones
846 collected by previous runs of xgettext and msgmerge. */
847 if (ref->comment_dot)
848 for (j = 0; j < ref->comment_dot->nitems; ++j)
849 message_comment_dot_append (result, ref->comment_dot->item[j]);
851 /* The flags are mixed in a special way. Some informations come
852 from the reference message (such as format/no-format), others
853 come from the definition file (fuzzy or not). */
854 result->is_fuzzy = def->is_fuzzy;
856 for (i = 0; i < NFORMATS; i++)
858 result->is_format[i] = ref->is_format[i];
860 /* If the reference message is marked as being a format specifier,
861 but the definition message is not, we check if the resulting
862 message would pass "msgfmt -c". If yes, then all is fine. If
863 not, we add a fuzzy marker, because
864 1. the message needs the translator's attention,
865 2. msgmerge must not transform a PO file which passes "msgfmt -c"
866 into a PO file which doesn't. */
867 if (!result->is_fuzzy
868 && possible_format_p (ref->is_format[i])
869 && !possible_format_p (def->is_format[i])
870 && msgfmt_check_pair_fails (&def->pos, ref->msgid, ref->msgid_plural,
871 msgstr, msgstr_len, i))
872 result->is_fuzzy = true;
875 result->do_wrap = ref->do_wrap;
877 /* Take the file position comments from the reference file, as they
878 are generated by xgettext. Any in the definition file are old ones
879 collected by previous runs of xgettext and msgmerge. */
880 for (j = 0; j < ref->filepos_count; ++j)
882 lex_pos_ty *pp = &ref->filepos[j];
883 message_comment_filepos (result, pp->file_name, pp->line_number);
886 /* Special postprocessing is needed if the reference message is a
887 plural form and the definition message isn't, or vice versa. */
888 if (ref->msgid_plural != NULL)
890 if (def->msgid_plural == NULL)
891 result->used = 1;
893 else
895 if (def->msgid_plural != NULL)
896 result->used = 2;
899 /* All done, return the merged message to the caller. */
900 return result;
904 #define DOT_FREQUENCY 10
906 static void
907 match_domain (const char *fn1, const char *fn2,
908 message_list_list_ty *definitions, message_list_ty *refmlp,
909 message_list_ty *resultmlp,
910 struct statistics *stats, unsigned int *processed)
912 message_ty *header_entry;
913 unsigned long int nplurals;
914 char *untranslated_plural_msgstr;
915 size_t j;
917 header_entry = message_list_search (definitions->item[0], "");
918 nplurals = get_plural_count (header_entry ? header_entry->msgstr : NULL);
919 untranslated_plural_msgstr = (char *) xmalloc (nplurals);
920 memset (untranslated_plural_msgstr, '\0', nplurals);
922 for (j = 0; j < refmlp->nitems; j++, (*processed)++)
924 message_ty *refmsg;
925 message_ty *defmsg;
927 /* Because merging can take a while we print something to signal
928 we are not dead. */
929 if (!quiet && verbosity_level <= 1 && *processed % DOT_FREQUENCY == 0)
930 fputc ('.', stderr);
932 refmsg = refmlp->item[j];
934 /* See if it is in the other file. */
935 defmsg = message_list_list_search (definitions, refmsg->msgid);
936 if (defmsg)
938 /* Merge the reference with the definition: take the #. and
939 #: comments from the reference, take the # comments from
940 the definition, take the msgstr from the definition. Add
941 this merged entry to the output message list. */
942 message_ty *mp = message_merge (defmsg, refmsg);
944 message_list_append (resultmlp, mp);
946 /* Remember that this message has been used, when we scan
947 later to see if anything was omitted. */
948 defmsg->used = 1;
949 stats->merged++;
951 else if (refmsg->msgid[0] != '\0')
953 /* If the message was not defined at all, try to find a very
954 similar message, it could be a typo, or the suggestion may
955 help. */
956 if (use_fuzzy_matching
957 && ((defmsg =
958 message_list_list_search_fuzzy (definitions,
959 refmsg->msgid)) != NULL))
961 message_ty *mp;
963 if (verbosity_level > 1)
965 po_gram_error_at_line (&refmsg->pos, _("\
966 this message is used but not defined..."));
967 po_gram_error_at_line (&defmsg->pos, _("\
968 ...but this definition is similar"));
971 /* Merge the reference with the definition: take the #. and
972 #: comments from the reference, take the # comments from
973 the definition, take the msgstr from the definition. Add
974 this merged entry to the output message list. */
975 mp = message_merge (defmsg, refmsg);
977 mp->is_fuzzy = true;
979 message_list_append (resultmlp, mp);
981 /* Remember that this message has been used, when we scan
982 later to see if anything was omitted. */
983 defmsg->used = 1;
984 stats->fuzzied++;
985 if (!quiet && verbosity_level <= 1)
986 /* Always print a dot if we handled a fuzzy match. */
987 fputc ('.', stderr);
989 else
991 message_ty *mp;
992 bool is_untranslated;
993 const char *p;
994 const char *pend;
996 if (verbosity_level > 1)
997 po_gram_error_at_line (&refmsg->pos, _("\
998 this message is used but not defined in %s"), fn1);
1000 mp = message_copy (refmsg);
1002 if (mp->msgid_plural != NULL)
1004 /* Test if mp is untranslated. (It most likely is.) */
1005 is_untranslated = true;
1006 for (p = mp->msgstr, pend = p + mp->msgstr_len; p < pend; p++)
1007 if (*p != '\0')
1009 is_untranslated = false;
1010 break;
1012 if (is_untranslated)
1014 /* Change mp->msgstr_len consecutive empty strings into
1015 nplurals consecutive empty strings. */
1016 if (nplurals > mp->msgstr_len)
1017 mp->msgstr = untranslated_plural_msgstr;
1018 mp->msgstr_len = nplurals;
1022 message_list_append (resultmlp, mp);
1023 stats->missing++;
1028 /* Now postprocess the problematic merges. This is needed because we
1029 want the result to pass the "msgfmt -c -v" check. */
1031 /* message_merge sets mp->used to 1 or 2, depending on the problem.
1032 Compute the bitwise OR of all these. */
1033 int problematic = 0;
1035 for (j = 0; j < resultmlp->nitems; j++)
1036 problematic |= resultmlp->item[j]->used;
1038 if (problematic)
1040 unsigned long int nplurals = 0;
1042 if (problematic & 1)
1044 /* Need to know nplurals of the result domain. */
1045 message_ty *header_entry = message_list_search (resultmlp, "");
1047 nplurals = get_plural_count (header_entry
1048 ? header_entry->msgstr
1049 : NULL);
1052 for (j = 0; j < resultmlp->nitems; j++)
1054 message_ty *mp = resultmlp->item[j];
1056 if ((mp->used & 1) && (nplurals > 0))
1058 /* ref->msgid_plural != NULL but def->msgid_plural == NULL.
1059 Use a copy of def->msgstr for each possible plural form. */
1060 size_t new_msgstr_len;
1061 char *new_msgstr;
1062 char *p;
1063 unsigned long i;
1065 if (verbosity_level > 1)
1067 po_gram_error_at_line (&mp->pos, _("\
1068 this message should define plural forms"));
1071 new_msgstr_len = nplurals * mp->msgstr_len;
1072 new_msgstr = (char *) xmalloc (new_msgstr_len);
1073 for (i = 0, p = new_msgstr; i < nplurals; i++)
1075 memcpy (p, mp->msgstr, mp->msgstr_len);
1076 p += mp->msgstr_len;
1078 mp->msgstr = new_msgstr;
1079 mp->msgstr_len = new_msgstr_len;
1080 mp->is_fuzzy = true;
1083 if ((mp->used & 2) && (mp->msgstr_len > strlen (mp->msgstr) + 1))
1085 /* ref->msgid_plural == NULL but def->msgid_plural != NULL.
1086 Use only the first among the plural forms. */
1088 if (verbosity_level > 1)
1090 po_gram_error_at_line (&mp->pos, _("\
1091 this message should not define plural forms"));
1094 mp->msgstr_len = strlen (mp->msgstr) + 1;
1095 mp->is_fuzzy = true;
1098 /* Postprocessing of this message is done. */
1099 mp->used = 0;
1105 static msgdomain_list_ty *
1106 merge (const char *fn1, const char *fn2, msgdomain_list_ty **defp)
1108 msgdomain_list_ty *def;
1109 msgdomain_list_ty *ref;
1110 size_t j, k;
1111 unsigned int processed;
1112 struct statistics stats;
1113 msgdomain_list_ty *result;
1114 message_list_list_ty *definitions;
1115 message_list_ty *empty_list;
1117 stats.merged = stats.fuzzied = stats.missing = stats.obsolete = 0;
1119 /* This is the definitions file, created by a human. */
1120 def = read_po_file (fn1);
1122 /* Create the set of places to look for message definitions: a list
1123 whose first element will be definitions for the current domain, and
1124 whose other elements come from the compendiums. */
1125 definitions = message_list_list_alloc ();
1126 message_list_list_append (definitions, NULL);
1127 if (compendiums)
1128 message_list_list_append_list (definitions, compendiums);
1129 empty_list = message_list_alloc (false);
1131 /* This is the references file, created by groping the sources with
1132 the xgettext program. */
1133 ref = read_po_file (fn2);
1134 /* Add a dummy header entry, if the references file contains none. */
1135 for (k = 0; k < ref->nitems; k++)
1136 if (message_list_search (ref->item[k]->messages, "") == NULL)
1138 static lex_pos_ty pos = { __FILE__, __LINE__ };
1139 message_ty *refheader = message_alloc ("", NULL, "", 1, &pos);
1141 message_list_prepend (ref->item[k]->messages, refheader);
1144 /* The references file can be either in ASCII or in UTF-8. If it is
1145 in UTF-8, we have to convert the definitions to UTF-8 as well. */
1147 bool was_utf8 = false;
1148 for (k = 0; k < ref->nitems; k++)
1150 message_list_ty *mlp = ref->item[k]->messages;
1152 for (j = 0; j < mlp->nitems; j++)
1153 if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
1155 const char *header = mlp->item[j]->msgstr;
1157 if (header != NULL)
1159 const char *charsetstr = strstr (header, "charset=");
1161 if (charsetstr != NULL)
1163 size_t len;
1165 charsetstr += strlen ("charset=");
1166 len = strcspn (charsetstr, " \t\n");
1167 if (len == strlen ("UTF-8")
1168 && c_strncasecmp (charsetstr, "UTF-8", len) == 0)
1169 was_utf8 = true;
1174 if (was_utf8)
1175 def = iconv_msgdomain_list (def, "UTF-8", fn1);
1178 result = msgdomain_list_alloc (false);
1179 processed = 0;
1181 /* Every reference must be matched with its definition. */
1182 if (!multi_domain_mode)
1183 for (k = 0; k < ref->nitems; k++)
1185 const char *domain = ref->item[k]->domain;
1186 message_list_ty *refmlp = ref->item[k]->messages;
1187 message_list_ty *resultmlp =
1188 msgdomain_list_sublist (result, domain, true);
1190 definitions->item[0] = msgdomain_list_sublist (def, domain, false);
1191 if (definitions->item[0] == NULL)
1192 definitions->item[0] = empty_list;
1194 match_domain (fn1, fn2, definitions, refmlp, resultmlp,
1195 &stats, &processed);
1197 else
1199 /* Apply the references messages in the default domain to each of
1200 the definition domains. */
1201 message_list_ty *refmlp = ref->item[0]->messages;
1203 for (k = 0; k < def->nitems; k++)
1205 const char *domain = def->item[k]->domain;
1206 message_list_ty *defmlp = def->item[k]->messages;
1208 /* Ignore the default message domain if it has no messages. */
1209 if (k > 0 || defmlp->nitems > 0)
1211 message_list_ty *resultmlp =
1212 msgdomain_list_sublist (result, domain, true);
1214 definitions->item[0] = defmlp;
1216 match_domain (fn1, fn2, definitions, refmlp, resultmlp,
1217 &stats, &processed);
1222 /* Look for messages in the definition file, which are not present
1223 in the reference file, indicating messages which defined but not
1224 used in the program. Don't scan the compendium(s). */
1225 for (k = 0; k < def->nitems; ++k)
1227 const char *domain = def->item[k]->domain;
1228 message_list_ty *defmlp = def->item[k]->messages;
1230 for (j = 0; j < defmlp->nitems; j++)
1232 message_ty *defmsg = defmlp->item[j];
1234 if (!defmsg->used)
1236 /* Remember the old translation although it is not used anymore.
1237 But we mark it as obsolete. */
1238 message_ty *mp;
1240 mp = message_copy (defmsg);
1241 mp->obsolete = true;
1243 message_list_append (msgdomain_list_sublist (result, domain, true),
1244 mp);
1245 stats.obsolete++;
1250 /* Determine the known a-priori encoding, if any. */
1251 if (def->encoding == ref->encoding)
1252 result->encoding = def->encoding;
1254 /* Report some statistics. */
1255 if (verbosity_level > 0)
1256 fprintf (stderr, _("%s\
1257 Read %ld old + %ld reference, \
1258 merged %ld, fuzzied %ld, missing %ld, obsolete %ld.\n"),
1259 !quiet && verbosity_level <= 1 ? "\n" : "",
1260 (long) def->nitems, (long) ref->nitems,
1261 (long) stats.merged, (long) stats.fuzzied, (long) stats.missing,
1262 (long) stats.obsolete);
1263 else if (!quiet)
1264 fputs (_(" done.\n"), stderr);
1266 /* Return results. */
1267 *defp = def;
1268 return result;