No empty .Rs/.Re
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / msgl-cat.c
blob902bd6ec9ab9917577313df234407c915868db92
1 /* Message list concatenation and duplicate handling.
2 Copyright (C) 2001-2003 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23 #include <alloca.h>
25 /* Specification. */
26 #include "msgl-cat.h"
28 #include <stdbool.h>
29 #include <stddef.h>
30 #include <string.h>
32 #include "error.h"
33 #include "xerror.h"
34 #include "message.h"
35 #include "read-po.h"
36 #include "po-charset.h"
37 #include "msgl-ascii.h"
38 #include "msgl-equal.h"
39 #include "msgl-iconv.h"
40 #include "xalloc.h"
41 #include "xallocsa.h"
42 #include "strstr.h"
43 #include "basename.h"
44 #include "exit.h"
45 #include "gettext.h"
47 #define _(str) gettext (str)
50 /* These variables control which messages are selected. */
51 int more_than;
52 int less_than;
54 /* If true, use the first available translation.
55 If false, merge all available translations into one and fuzzy it. */
56 bool use_first;
58 /* If true, merge like msgcomm.
59 If false, merge like msgcat and msguniq. */
60 bool msgcomm_mode = false;
62 /* If true, omit the header entry.
63 If false, keep the header entry present in the input. */
64 bool omit_header = false;
67 static bool
68 is_message_selected (const message_ty *tmp)
70 int used = (tmp->used >= 0 ? tmp->used : - tmp->used);
72 return (tmp->msgid[0] == '\0'
73 ? !omit_header /* keep the header entry */
74 : (used > more_than && used < less_than));
78 static bool
79 is_message_needed (const message_ty *mp)
81 if (!msgcomm_mode
82 && ((mp->msgid[0] != '\0' && mp->is_fuzzy) || mp->msgstr[0] == '\0'))
83 /* Weak translation. Needed if there are only weak translations. */
84 return mp->tmp->used < 0 && is_message_selected (mp->tmp);
85 else
86 /* Good translation. */
87 return is_message_selected (mp->tmp);
91 /* The use_first logic. */
92 static bool
93 is_message_first_needed (const message_ty *mp)
95 if (mp->tmp->obsolete && is_message_needed (mp))
97 mp->tmp->obsolete = false;
98 return true;
100 else
101 return false;
105 msgdomain_list_ty *
106 catenate_msgdomain_list (string_list_ty *file_list, const char *to_code)
108 const char * const *files = file_list->item;
109 size_t nfiles = file_list->nitems;
110 msgdomain_list_ty **mdlps;
111 const char ***canon_charsets;
112 const char ***identifications;
113 msgdomain_list_ty *total_mdlp;
114 const char *canon_to_code;
115 size_t n, j, k;
117 /* Read input files. */
118 mdlps =
119 (msgdomain_list_ty **) xmalloc (nfiles * sizeof (msgdomain_list_ty *));
120 for (n = 0; n < nfiles; n++)
121 mdlps[n] = read_po_file (files[n]);
123 /* Determine the canonical name of each input file's encoding. */
124 canon_charsets = (const char ***) xmalloc (nfiles * sizeof (const char **));
125 for (n = 0; n < nfiles; n++)
127 msgdomain_list_ty *mdlp = mdlps[n];
128 size_t k;
130 canon_charsets[n] =
131 (const char **) xmalloc (mdlp->nitems * sizeof (const char *));
132 for (k = 0; k < mdlp->nitems; k++)
134 message_list_ty *mlp = mdlp->item[k]->messages;
135 const char *canon_from_code = NULL;
137 if (mlp->nitems > 0)
139 for (j = 0; j < mlp->nitems; j++)
140 if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
142 const char *header = mlp->item[j]->msgstr;
144 if (header != NULL)
146 const char *charsetstr = strstr (header, "charset=");
148 if (charsetstr != NULL)
150 size_t len;
151 char *charset;
152 const char *canon_charset;
154 charsetstr += strlen ("charset=");
155 len = strcspn (charsetstr, " \t\n");
156 charset = (char *) xallocsa (len + 1);
157 memcpy (charset, charsetstr, len);
158 charset[len] = '\0';
160 canon_charset = po_charset_canonicalize (charset);
161 if (canon_charset == NULL)
163 /* Don't give an error for POT files, because
164 POT files usually contain only ASCII
165 msgids. */
166 const char *filename = files[n];
167 size_t filenamelen = strlen (filename);
169 if (filenamelen >= 4
170 && memcmp (filename + filenamelen - 4,
171 ".pot", 4) == 0
172 && strcmp (charset, "CHARSET") == 0)
173 canon_charset = po_charset_ascii;
174 else
175 error (EXIT_FAILURE, 0,
176 _("\
177 present charset \"%s\" is not a portable encoding name"),
178 charset);
181 freesa (charset);
183 if (canon_from_code == NULL)
184 canon_from_code = canon_charset;
185 else if (canon_from_code != canon_charset)
186 error (EXIT_FAILURE, 0,
187 _("\
188 two different charsets \"%s\" and \"%s\" in input file"),
189 canon_from_code, canon_charset);
193 if (canon_from_code == NULL)
195 if (is_ascii_message_list (mlp))
196 canon_from_code = po_charset_ascii;
197 else if (mdlp->encoding != NULL)
198 canon_from_code = mdlp->encoding;
199 else
201 if (k == 0)
202 error (EXIT_FAILURE, 0, _("\
203 input file `%s' doesn't contain a header entry with a charset specification"),
204 files[n]);
205 else
206 error (EXIT_FAILURE, 0, _("\
207 domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"),
208 mdlp->item[k]->domain, files[n]);
212 canon_charsets[n][k] = canon_from_code;
216 /* Determine textual identifications of each file/domain combination. */
217 identifications = (const char ***) xmalloc (nfiles * sizeof (const char **));
218 for (n = 0; n < nfiles; n++)
220 const char *filename = basename (files[n]);
221 msgdomain_list_ty *mdlp = mdlps[n];
222 size_t k;
224 identifications[n] =
225 (const char **) xmalloc (mdlp->nitems * sizeof (const char *));
226 for (k = 0; k < mdlp->nitems; k++)
228 const char *domain = mdlp->item[k]->domain;
229 message_list_ty *mlp = mdlp->item[k]->messages;
230 char *project_id = NULL;
232 for (j = 0; j < mlp->nitems; j++)
233 if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
235 const char *header = mlp->item[j]->msgstr;
237 if (header != NULL)
239 const char *cp = strstr (header, "Project-Id-Version:");
241 if (cp != NULL)
243 const char *endp;
245 cp += sizeof ("Project-Id-Version:") - 1;
247 endp = strchr (cp, '\n');
248 if (endp == NULL)
249 endp = cp + strlen (cp);
251 while (cp < endp && *cp == ' ')
252 cp++;
254 if (cp < endp)
256 size_t len = endp - cp;
257 project_id = (char *) xmalloc (len + 1);
258 memcpy (project_id, cp, len);
259 project_id[len] = '\0';
261 break;
266 identifications[n][k] =
267 (project_id != NULL
268 ? (k > 0 ? xasprintf ("%s:%s (%s)", filename, domain, project_id)
269 : xasprintf ("%s (%s)", filename, project_id))
270 : (k > 0 ? xasprintf ("%s:%s", filename, domain)
271 : xasprintf ("%s", filename)));
275 /* Create list of resulting messages, but don't fill it. Only count
276 the number of translations for each message.
277 If for a message, there is at least one non-fuzzy, non-empty translation,
278 use only the non-fuzzy, non-empty translations. Otherwise use the
279 fuzzy or empty translations as well. */
280 total_mdlp = msgdomain_list_alloc (true);
281 for (n = 0; n < nfiles; n++)
283 msgdomain_list_ty *mdlp = mdlps[n];
285 for (k = 0; k < mdlp->nitems; k++)
287 const char *domain = mdlp->item[k]->domain;
288 message_list_ty *mlp = mdlp->item[k]->messages;
289 message_list_ty *total_mlp;
291 total_mlp = msgdomain_list_sublist (total_mdlp, domain, true);
293 for (j = 0; j < mlp->nitems; j++)
295 message_ty *mp = mlp->item[j];
296 message_ty *tmp;
297 size_t i;
299 tmp = message_list_search (total_mlp, mp->msgid);
300 if (tmp == NULL)
302 tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0,
303 &mp->pos);
304 tmp->is_fuzzy = true; /* may be set to false later */
305 for (i = 0; i < NFORMATS; i++)
306 tmp->is_format[i] = undecided; /* may be set to yes/no later */
307 tmp->do_wrap = yes; /* may be set to no later */
308 tmp->obsolete = true; /* may be set to false later */
309 tmp->alternative_count = 0;
310 tmp->alternative = NULL;
311 message_list_append (total_mlp, tmp);
314 if (!msgcomm_mode
315 && ((mp->msgid[0] != '\0' && mp->is_fuzzy)
316 || mp->msgstr[0] == '\0'))
317 /* Weak translation. Counted as negative tmp->used. */
319 if (tmp->used <= 0)
320 tmp->used--;
322 else
323 /* Good translation. Counted as positive tmp->used. */
325 if (tmp->used < 0)
326 tmp->used = 0;
327 tmp->used++;
329 mp->tmp = tmp;
334 /* Remove messages that are not used and need not be converted. */
335 for (n = 0; n < nfiles; n++)
337 msgdomain_list_ty *mdlp = mdlps[n];
339 for (k = 0; k < mdlp->nitems; k++)
341 message_list_ty *mlp = mdlp->item[k]->messages;
343 message_list_remove_if_not (mlp,
344 use_first
345 ? is_message_first_needed
346 : is_message_needed);
348 /* If no messages are remaining, drop the charset. */
349 if (mlp->nitems == 0)
350 canon_charsets[n][k] = NULL;
353 for (k = 0; k < total_mdlp->nitems; k++)
355 message_list_ty *mlp = total_mdlp->item[k]->messages;
357 message_list_remove_if_not (mlp, is_message_selected);
360 /* Determine the common known a-priori encoding, if any. */
361 if (nfiles > 0)
363 bool all_same_encoding = true;
365 for (n = 1; n < nfiles; n++)
366 if (mdlps[n]->encoding != mdlps[0]->encoding)
368 all_same_encoding = false;
369 break;
372 if (all_same_encoding)
373 total_mdlp->encoding = mdlps[0]->encoding;
376 /* Determine the target encoding for the remaining messages. */
377 if (to_code != NULL)
379 /* Canonicalize target encoding. */
380 canon_to_code = po_charset_canonicalize (to_code);
381 if (canon_to_code == NULL)
382 error (EXIT_FAILURE, 0,
383 _("target charset \"%s\" is not a portable encoding name."),
384 to_code);
386 else
388 /* No target encoding was specified. Test whether the messages are
389 all in a single encoding. If so, conversion is not needed. */
390 const char *first = NULL;
391 const char *second = NULL;
392 bool with_ASCII = false;
393 bool with_UTF8 = false;
394 bool all_ASCII_compatible = true;
396 for (n = 0; n < nfiles; n++)
398 msgdomain_list_ty *mdlp = mdlps[n];
400 for (k = 0; k < mdlp->nitems; k++)
401 if (canon_charsets[n][k] != NULL)
403 if (canon_charsets[n][k] == po_charset_ascii)
404 with_ASCII = true;
405 else
407 if (first == NULL)
408 first = canon_charsets[n][k];
409 else if (canon_charsets[n][k] != first && second == NULL)
410 second = canon_charsets[n][k];
412 if (strcmp (canon_charsets[n][k], "UTF-8") == 0)
413 with_UTF8 = true;
415 if (!po_charset_ascii_compatible (canon_charsets[n][k]))
416 all_ASCII_compatible = false;
421 if (with_ASCII && !all_ASCII_compatible)
423 /* assert (first != NULL); */
424 if (second == NULL)
425 second = po_charset_ascii;
428 if (second != NULL)
430 /* A conversion is needed. Warn the user since he hasn't asked
431 for it and might be surprised. */
432 if (with_UTF8)
433 multiline_warning (xasprintf (_("warning: ")),
434 xasprintf (_("\
435 Input files contain messages in different encodings, UTF-8 among others.\n\
436 Converting the output to UTF-8.\n\
437 ")));
438 else
439 multiline_warning (xasprintf (_("warning: ")),
440 xasprintf (_("\
441 Input files contain messages in different encodings, %s and %s among others.\n\
442 Converting the output to UTF-8.\n\
443 To select a different output encoding, use the --to-code option.\n\
444 "), first, second));
445 canon_to_code = po_charset_utf8;
447 else if (first != NULL && with_ASCII && all_ASCII_compatible)
449 /* The conversion is a no-op conversion. Don't warn the user,
450 but still perform the conversion, in order to check that the
451 input was really ASCII. */
452 canon_to_code = first;
454 else
456 /* No conversion needed. */
457 canon_to_code = NULL;
461 /* Now convert the remaining messages to to_code. */
462 if (canon_to_code != NULL)
463 for (n = 0; n < nfiles; n++)
465 msgdomain_list_ty *mdlp = mdlps[n];
467 for (k = 0; k < mdlp->nitems; k++)
468 if (canon_charsets[n][k] != NULL)
469 /* If the user hasn't given a to_code, don't bother doing a noop
470 conversion that would only replace the charset name in the
471 header entry with its canonical equivalent. */
472 if (!(to_code == NULL && canon_charsets[n][k] == canon_to_code))
473 iconv_message_list (mdlp->item[k]->messages, canon_charsets[n][k],
474 canon_to_code, files[n]);
477 /* Fill the resulting messages. */
478 for (n = 0; n < nfiles; n++)
480 msgdomain_list_ty *mdlp = mdlps[n];
482 for (k = 0; k < mdlp->nitems; k++)
484 message_list_ty *mlp = mdlp->item[k]->messages;
486 for (j = 0; j < mlp->nitems; j++)
488 message_ty *mp = mlp->item[j];
489 message_ty *tmp = mp->tmp;
490 size_t i;
492 /* No need to discard unneeded weak translations here;
493 they have already been filtered out above. */
494 if (use_first || tmp->used == 1 || tmp->used == -1)
496 /* Copy mp, as only message, into tmp. */
497 tmp->msgstr = mp->msgstr;
498 tmp->msgstr_len = mp->msgstr_len;
499 tmp->pos = mp->pos;
500 if (mp->comment)
501 for (i = 0; i < mp->comment->nitems; i++)
502 message_comment_append (tmp, mp->comment->item[i]);
503 if (mp->comment_dot)
504 for (i = 0; i < mp->comment_dot->nitems; i++)
505 message_comment_dot_append (tmp,
506 mp->comment_dot->item[i]);
507 for (i = 0; i < mp->filepos_count; i++)
508 message_comment_filepos (tmp, mp->filepos[i].file_name,
509 mp->filepos[i].line_number);
510 tmp->is_fuzzy = mp->is_fuzzy;
511 for (i = 0; i < NFORMATS; i++)
512 tmp->is_format[i] = mp->is_format[i];
513 tmp->do_wrap = mp->do_wrap;
514 tmp->obsolete = mp->obsolete;
516 else if (msgcomm_mode)
518 /* Copy mp, as only message, into tmp. */
519 if (tmp->msgstr == NULL)
521 tmp->msgstr = mp->msgstr;
522 tmp->msgstr_len = mp->msgstr_len;
523 tmp->pos = mp->pos;
524 tmp->is_fuzzy = mp->is_fuzzy;
526 if (mp->comment && tmp->comment == NULL)
527 for (i = 0; i < mp->comment->nitems; i++)
528 message_comment_append (tmp, mp->comment->item[i]);
529 if (mp->comment_dot && tmp->comment_dot == NULL)
530 for (i = 0; i < mp->comment_dot->nitems; i++)
531 message_comment_dot_append (tmp,
532 mp->comment_dot->item[i]);
533 for (i = 0; i < mp->filepos_count; i++)
534 message_comment_filepos (tmp, mp->filepos[i].file_name,
535 mp->filepos[i].line_number);
536 for (i = 0; i < NFORMATS; i++)
537 if (tmp->is_format[i] == undecided)
538 tmp->is_format[i] = mp->is_format[i];
539 if (tmp->do_wrap == undecided)
540 tmp->do_wrap = mp->do_wrap;
541 tmp->obsolete = false;
543 else
545 /* Copy mp, among others, into tmp. */
546 char *id = xasprintf ("#-#-#-#-# %s #-#-#-#-#",
547 identifications[n][k]);
548 size_t nbytes;
550 if (tmp->alternative_count == 0)
551 tmp->pos = mp->pos;
553 i = tmp->alternative_count;
554 nbytes = (i + 1) * sizeof (struct altstr);
555 tmp->alternative = xrealloc (tmp->alternative, nbytes);
556 tmp->alternative[i].msgstr = mp->msgstr;
557 tmp->alternative[i].msgstr_len = mp->msgstr_len;
558 tmp->alternative[i].msgstr_end =
559 tmp->alternative[i].msgstr + tmp->alternative[i].msgstr_len;
560 tmp->alternative[i].comment = mp->comment;
561 tmp->alternative[i].comment_dot = mp->comment_dot;
562 tmp->alternative[i].id = id;
563 tmp->alternative_count = i + 1;
565 for (i = 0; i < mp->filepos_count; i++)
566 message_comment_filepos (tmp, mp->filepos[i].file_name,
567 mp->filepos[i].line_number);
568 if (!mp->is_fuzzy)
569 tmp->is_fuzzy = false;
570 for (i = 0; i < NFORMATS; i++)
571 if (mp->is_format[i] == yes)
572 tmp->is_format[i] = yes;
573 else if (mp->is_format[i] == no
574 && tmp->is_format[i] == undecided)
575 tmp->is_format[i] = no;
576 if (mp->do_wrap == no)
577 tmp->do_wrap = no;
578 if (!mp->obsolete)
579 tmp->obsolete = false;
584 for (k = 0; k < total_mdlp->nitems; k++)
586 message_list_ty *mlp = total_mdlp->item[k]->messages;
588 for (j = 0; j < mlp->nitems; j++)
590 message_ty *tmp = mlp->item[j];
592 if (tmp->alternative_count > 0)
594 /* Test whether all alternative translations are equal. */
595 struct altstr *first = &tmp->alternative[0];
596 size_t i;
598 for (i = 0; i < tmp->alternative_count; i++)
599 if (!(tmp->alternative[i].msgstr_len == first->msgstr_len
600 && memcmp (tmp->alternative[i].msgstr, first->msgstr,
601 first->msgstr_len) == 0))
602 break;
604 if (i == tmp->alternative_count)
606 /* All alternatives are equal. */
607 tmp->msgstr = first->msgstr;
608 tmp->msgstr_len = first->msgstr_len;
610 else
612 /* Concatenate the alternative msgstrs into a single one,
613 separated by markers. */
614 size_t len;
615 const char *p;
616 const char *p_end;
617 char *new_msgstr;
618 char *np;
620 len = 0;
621 for (i = 0; i < tmp->alternative_count; i++)
623 size_t id_len = strlen (tmp->alternative[i].id);
625 len += tmp->alternative[i].msgstr_len;
627 p = tmp->alternative[i].msgstr;
628 p_end = tmp->alternative[i].msgstr_end;
629 for (; p < p_end; p += strlen (p) + 1)
630 len += id_len + 2;
633 new_msgstr = (char *) xmalloc (len);
634 np = new_msgstr;
635 for (;;)
637 /* Test whether there's one more plural form to
638 process. */
639 for (i = 0; i < tmp->alternative_count; i++)
640 if (tmp->alternative[i].msgstr
641 < tmp->alternative[i].msgstr_end)
642 break;
643 if (i == tmp->alternative_count)
644 break;
646 /* Process next plural form. */
647 for (i = 0; i < tmp->alternative_count; i++)
648 if (tmp->alternative[i].msgstr
649 < tmp->alternative[i].msgstr_end)
651 if (np > new_msgstr && np[-1] != '\0'
652 && np[-1] != '\n')
653 *np++ = '\n';
655 len = strlen (tmp->alternative[i].id);
656 memcpy (np, tmp->alternative[i].id, len);
657 np += len;
658 *np++ = '\n';
660 len = strlen (tmp->alternative[i].msgstr);
661 memcpy (np, tmp->alternative[i].msgstr, len);
662 np += len;
663 tmp->alternative[i].msgstr += len + 1;
666 /* Plural forms are separated by NUL bytes. */
667 *np++ = '\0';
669 tmp->msgstr = new_msgstr;
670 tmp->msgstr_len = np - new_msgstr;
672 tmp->is_fuzzy = true;
675 /* Test whether all alternative comments are equal. */
676 for (i = 0; i < tmp->alternative_count; i++)
677 if (tmp->alternative[i].comment == NULL
678 || !string_list_equal (tmp->alternative[i].comment,
679 first->comment))
680 break;
682 if (i == tmp->alternative_count)
683 /* All alternatives are equal. */
684 tmp->comment = first->comment;
685 else
686 /* Concatenate the alternative comments into a single one,
687 separated by markers. */
688 for (i = 0; i < tmp->alternative_count; i++)
690 string_list_ty *slp = tmp->alternative[i].comment;
692 if (slp != NULL)
694 size_t l;
696 message_comment_append (tmp, tmp->alternative[i].id);
697 for (l = 0; l < slp->nitems; l++)
698 message_comment_append (tmp, slp->item[l]);
702 /* Test whether all alternative dot comments are equal. */
703 for (i = 0; i < tmp->alternative_count; i++)
704 if (tmp->alternative[i].comment_dot == NULL
705 || !string_list_equal (tmp->alternative[i].comment_dot,
706 first->comment_dot))
707 break;
709 if (i == tmp->alternative_count)
710 /* All alternatives are equal. */
711 tmp->comment_dot = first->comment_dot;
712 else
713 /* Concatenate the alternative dot comments into a single one,
714 separated by markers. */
715 for (i = 0; i < tmp->alternative_count; i++)
717 string_list_ty *slp = tmp->alternative[i].comment_dot;
719 if (slp != NULL)
721 size_t l;
723 message_comment_dot_append (tmp,
724 tmp->alternative[i].id);
725 for (l = 0; l < slp->nitems; l++)
726 message_comment_dot_append (tmp, slp->item[l]);
733 return total_mdlp;