No empty .Rs/.Re
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / write-po.c
blob05a136e5d07f20711612a57a9b69097cce4d3c62
1 /* GNU gettext - internationalization aids
2 Copyright (C) 1995-1998, 2000-2005 Free Software Foundation, Inc.
4 This file was written by Peter Miller <millerp@canb.auug.org.au>
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23 #include <alloca.h>
25 /* Specification. */
26 #include "write-po.h"
28 #include <errno.h>
29 #include <limits.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
34 #if HAVE_ICONV
35 # include <iconv.h>
36 #endif
38 #include "c-ctype.h"
39 #include "po-charset.h"
40 #include "linebreak.h"
41 #include "msgl-ascii.h"
42 #include "write-properties.h"
43 #include "write-stringtable.h"
44 #include "xalloc.h"
45 #include "xallocsa.h"
46 #include "strstr.h"
47 #include "fwriteerror.h"
48 #include "exit.h"
49 #include "error-progname.h"
50 #include "xerror.h"
51 #include "po-error.h"
52 #include "gettext.h"
54 /* Our regular abbreviation. */
55 #define _(str) gettext (str)
57 #if HAVE_DECL_PUTC_UNLOCKED
58 # undef putc
59 # define putc putc_unlocked
60 #endif
63 /* =================== Putting together a #, flags line. =================== */
66 /* Convert IS_FORMAT in the context of programming language LANG to a flag
67 string for use in #, flags. */
69 const char *
70 make_format_description_string (enum is_format is_format, const char *lang,
71 bool debug)
73 static char result[100];
75 switch (is_format)
77 case possible:
78 if (debug)
80 sprintf (result, " possible-%s-format", lang);
81 break;
83 /* FALLTHROUGH */
84 case yes_according_to_context:
85 case yes:
86 sprintf (result, " %s-format", lang);
87 break;
88 case no:
89 sprintf (result, " no-%s-format", lang);
90 break;
91 default:
92 /* The others have already been filtered out by significant_format_p. */
93 abort ();
96 return result;
100 /* Return true if IS_FORMAT is worth mentioning in a #, flags list. */
102 bool
103 significant_format_p (enum is_format is_format)
105 return is_format != undecided && is_format != impossible;
109 /* Return true if one of IS_FORMAT is worth mentioning in a #, flags list. */
111 static bool
112 has_significant_format_p (const enum is_format is_format[NFORMATS])
114 size_t i;
116 for (i = 0; i < NFORMATS; i++)
117 if (significant_format_p (is_format[i]))
118 return true;
119 return false;
123 /* Convert a wrapping flag DO_WRAP to a string for use in #, flags. */
125 static const char *
126 make_c_width_description_string (enum is_wrap do_wrap)
128 const char *result = NULL;
130 switch (do_wrap)
132 case yes:
133 result = " wrap";
134 break;
135 case no:
136 result = " no-wrap";
137 break;
138 default:
139 abort ();
142 return result;
146 /* ================ Output parts of a message, as comments. ================ */
149 /* Output mp->comment as a set of comment lines. */
151 void
152 message_print_comment (const message_ty *mp, FILE *fp)
154 if (mp->comment != NULL)
156 size_t j;
158 for (j = 0; j < mp->comment->nitems; ++j)
160 const char *s = mp->comment->item[j];
163 const char *e;
164 putc ('#', fp);
165 if (*s != '\0' && *s != ' ')
166 putc (' ', fp);
167 e = strchr (s, '\n');
168 if (e == NULL)
170 fputs (s, fp);
171 s = NULL;
173 else
175 fwrite (s, 1, e - s, fp);
176 s = e + 1;
178 putc ('\n', fp);
180 while (s != NULL);
186 /* Output mp->comment_dot as a set of comment lines. */
188 void
189 message_print_comment_dot (const message_ty *mp, FILE *fp)
191 if (mp->comment_dot != NULL)
193 size_t j;
195 for (j = 0; j < mp->comment_dot->nitems; ++j)
197 const char *s = mp->comment_dot->item[j];
198 putc ('#', fp);
199 putc ('.', fp);
200 if (*s != '\0' && *s != ' ')
201 putc (' ', fp);
202 fputs (s, fp);
203 putc ('\n', fp);
209 /* Output mp->filepos as a set of comment lines. */
211 void
212 message_print_comment_filepos (const message_ty *mp, FILE *fp,
213 bool uniforum, size_t page_width)
215 if (mp->filepos_count != 0)
217 if (uniforum)
219 size_t j;
221 for (j = 0; j < mp->filepos_count; ++j)
223 lex_pos_ty *pp = &mp->filepos[j];
224 char *cp = pp->file_name;
225 while (cp[0] == '.' && cp[1] == '/')
226 cp += 2;
227 /* There are two Sun formats to choose from: SunOS and
228 Solaris. Use the Solaris form here. */
229 fprintf (fp, "# File: %s, line: %ld\n",
230 cp, (long) pp->line_number);
233 else
235 size_t column;
236 size_t j;
238 fputs ("#:", fp);
239 column = 2;
240 for (j = 0; j < mp->filepos_count; ++j)
242 lex_pos_ty *pp;
243 char buffer[21];
244 char *cp;
245 size_t len;
247 pp = &mp->filepos[j];
248 cp = pp->file_name;
249 while (cp[0] == '.' && cp[1] == '/')
250 cp += 2;
251 /* Some xgettext input formats, like RST, lack line numbers. */
252 if (pp->line_number == (size_t)(-1))
253 buffer[0] = '\0';
254 else
255 sprintf (buffer, ":%ld", (long) pp->line_number);
256 len = strlen (cp) + strlen (buffer) + 1;
257 if (column > 2 && column + len >= page_width)
259 fputs ("\n#:", fp);
260 column = 2;
262 fprintf (fp, " %s%s", cp, buffer);
263 column += len;
265 putc ('\n', fp);
271 /* Output mp->is_fuzzy, mp->is_format, mp->do_wrap as a comment line. */
273 void
274 message_print_comment_flags (const message_ty *mp, FILE *fp, bool debug)
276 if ((mp->is_fuzzy && mp->msgstr[0] != '\0')
277 || has_significant_format_p (mp->is_format)
278 || mp->do_wrap == no)
280 bool first_flag = true;
281 size_t i;
283 putc ('#', fp);
284 putc (',', fp);
286 /* We don't print the fuzzy flag if the msgstr is empty. This
287 might be introduced by the user but we want to normalize the
288 output. */
289 if (mp->is_fuzzy && mp->msgstr[0] != '\0')
291 fputs (" fuzzy", fp);
292 first_flag = false;
295 for (i = 0; i < NFORMATS; i++)
296 if (significant_format_p (mp->is_format[i]))
298 if (!first_flag)
299 putc (',', fp);
301 fputs (make_format_description_string (mp->is_format[i],
302 format_language[i], debug),
303 fp);
304 first_flag = false;
307 if (mp->do_wrap == no)
309 if (!first_flag)
310 putc (',', fp);
312 fputs (make_c_width_description_string (mp->do_wrap), fp);
313 first_flag = false;
316 putc ('\n', fp);
321 /* =========== Some parameters for use by 'msgdomain_list_print'. ========== */
324 /* This variable controls the page width when printing messages.
325 Defaults to PAGE_WIDTH if not set. Zero (0) given to message_page_-
326 width_set will result in no wrapping being performed. */
327 static size_t page_width = PAGE_WIDTH;
329 void
330 message_page_width_set (size_t n)
332 if (n == 0)
334 page_width = INT_MAX;
335 return;
338 if (n < 20)
339 n = 20;
341 page_width = n;
345 /* This variable controls the extent to which the page width applies.
346 True means it applies to message strings and file reference lines.
347 False means it applies to file reference lines only. */
348 static bool wrap_strings = true;
350 void
351 message_page_width_ignore ()
353 wrap_strings = false;
357 /* These three variables control the output style of the message_print
358 function. Interface functions for them are to be used. */
359 static bool indent = false;
360 static bool uniforum = false;
361 static bool escape = false;
363 void
364 message_print_style_indent ()
366 indent = true;
369 void
370 message_print_style_uniforum ()
372 uniforum = true;
375 void
376 message_print_style_escape (bool flag)
378 escape = flag;
382 /* Whether to output a file in Java .properties syntax. */
383 static bool use_syntax_properties = false;
385 void
386 message_print_syntax_properties ()
388 use_syntax_properties = true;
392 /* Whether to output a file in NeXTstep/GNUstep .strings syntax. */
393 static bool use_syntax_stringtable = false;
395 void
396 message_print_syntax_stringtable ()
398 use_syntax_stringtable = true;
402 /* ================ msgdomain_list_print() and subroutines. ================ */
405 /* A version of memcpy optimized for the case n <= 1. */
406 static inline void
407 memcpy_small (void *dst, const void *src, size_t n)
409 if (n > 0)
411 char *q = (char *) dst;
412 const char *p = (const char *) src;
414 *q = *p;
415 if (--n > 0)
416 do *++q = *++p; while (--n > 0);
421 static void
422 wrap (FILE *fp, const char *line_prefix, const char *name, const char *value,
423 enum is_wrap do_wrap, const char *charset)
425 const char *canon_charset;
426 const char *s;
427 bool first_line;
428 #if HAVE_ICONV
429 const char *envval;
430 iconv_t conv;
431 #endif
432 bool weird_cjk;
434 canon_charset = po_charset_canonicalize (charset);
436 #if HAVE_ICONV
437 /* The old Solaris/openwin msgfmt and GNU msgfmt <= 0.10.35 don't know
438 about multibyte encodings, and require a spurious backslash after
439 every multibyte character whose last byte is 0x5C. Some programs,
440 like vim, distribute PO files in this broken format. It is important
441 for such programs that GNU msgmerge continues to support this old
442 PO file format when the Makefile requests it. */
443 envval = getenv ("OLD_PO_FILE_OUTPUT");
444 if (envval != NULL && *envval != '\0')
445 /* Write a PO file in old format, with extraneous backslashes. */
446 conv = (iconv_t)(-1);
447 else
448 if (canon_charset == NULL)
449 /* Invalid PO file encoding. */
450 conv = (iconv_t)(-1);
451 else
452 /* Avoid glibc-2.1 bug with EUC-KR. */
453 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
454 if (strcmp (canon_charset, "EUC-KR") == 0)
455 conv = (iconv_t)(-1);
456 else
457 # endif
458 /* Avoid Solaris 2.9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, GBK,
459 GB18030. */
460 # if defined __sun && !defined _LIBICONV_VERSION
461 if ( strcmp (canon_charset, "GB2312") == 0
462 || strcmp (canon_charset, "EUC-TW") == 0
463 || strcmp (canon_charset, "BIG5") == 0
464 || strcmp (canon_charset, "BIG5-HKSCS") == 0
465 || strcmp (canon_charset, "GBK") == 0
466 || strcmp (canon_charset, "GB18030") == 0)
467 conv = (iconv_t)(-1);
468 else
469 # endif
470 /* Use iconv() to parse multibyte characters. */
471 conv = iconv_open ("UTF-8", canon_charset);
473 if (conv != (iconv_t)(-1))
474 weird_cjk = false;
475 else
476 #endif
477 if (canon_charset == NULL)
478 weird_cjk = false;
479 else
480 weird_cjk = po_is_charset_weird_cjk (canon_charset);
482 if (canon_charset == NULL)
483 canon_charset = po_charset_ascii;
485 /* Loop over the '\n' delimited portions of value. */
486 s = value;
487 first_line = true;
490 /* The \a and \v escapes were added by the ANSI C Standard.
491 Prior to the Standard, most compilers did not have them.
492 Because we need the same program on all platforms we don't provide
493 support for them here. Thus we only support \b\f\n\r\t. */
494 # define is_escape(c) \
495 ((c) == '\b' || (c) == '\f' || (c) == '\n' || (c) == '\r' || (c) == '\t')
497 const char *es;
498 const char *ep;
499 size_t portion_len;
500 char *portion;
501 char *overrides;
502 char *linebreaks;
503 char *pp;
504 char *op;
505 int startcol, startcol_after_break, width;
506 size_t i;
508 for (es = s; *es != '\0'; )
509 if (*es++ == '\n')
510 break;
512 /* Expand escape sequences in each portion. */
513 for (ep = s, portion_len = 0; ep < es; ep++)
515 char c = *ep;
516 if (is_escape (c))
517 portion_len += 2;
518 else if (escape && !c_isprint ((unsigned char) c))
519 portion_len += 4;
520 else if (c == '\\' || c == '"')
521 portion_len += 2;
522 else
524 #if HAVE_ICONV
525 if (conv != (iconv_t)(-1))
527 /* Skip over a complete multi-byte character. Don't
528 interpret the second byte of a multi-byte character as
529 ASCII. This is needed for the BIG5, BIG5-HKSCS, GBK,
530 GB18030, SHIFT_JIS, JOHAB encodings. */
531 char scratchbuf[64];
532 const char *inptr = ep;
533 size_t insize;
534 char *outptr = &scratchbuf[0];
535 size_t outsize = sizeof (scratchbuf);
536 size_t res;
538 res = (size_t)(-1);
539 for (insize = 1; inptr + insize <= es; insize++)
541 res = iconv (conv,
542 (ICONV_CONST char **) &inptr, &insize,
543 &outptr, &outsize);
544 if (!(res == (size_t)(-1) && errno == EINVAL))
545 break;
546 /* We expect that no input bytes have been consumed
547 so far. */
548 if (inptr != ep)
549 abort ();
551 if (res == (size_t)(-1))
553 if (errno == EILSEQ)
555 po_error (0, 0, _("invalid multibyte sequence"));
556 continue;
558 else
559 abort ();
561 insize = inptr - ep;
562 portion_len += insize;
563 ep += insize - 1;
565 else
566 #endif
568 if (weird_cjk
569 /* Special handling of encodings with CJK structure. */
570 && ep + 2 <= es
571 && (unsigned char) ep[0] >= 0x80
572 && (unsigned char) ep[1] >= 0x30)
574 portion_len += 2;
575 ep += 1;
577 else
578 portion_len += 1;
582 portion = (char *) xmalloc (portion_len);
583 overrides = (char *) xmalloc (portion_len);
584 memset (overrides, UC_BREAK_UNDEFINED, portion_len);
585 for (ep = s, pp = portion, op = overrides; ep < es; ep++)
587 char c = *ep;
588 if (is_escape (c))
590 switch (c)
592 case '\b': c = 'b'; break;
593 case '\f': c = 'f'; break;
594 case '\n': c = 'n'; break;
595 case '\r': c = 'r'; break;
596 case '\t': c = 't'; break;
597 default: abort ();
599 *pp++ = '\\';
600 *pp++ = c;
601 op++;
602 *op++ = UC_BREAK_PROHIBITED;
603 /* We warn about any use of escape sequences beside
604 '\n' and '\t'. */
605 if (c != 'n' && c != 't')
606 po_error (0, 0, _("\
607 internationalized messages should not contain the `\\%c' escape sequence"),
610 else if (escape && !c_isprint ((unsigned char) c))
612 *pp++ = '\\';
613 *pp++ = '0' + (((unsigned char) c >> 6) & 7);
614 *pp++ = '0' + (((unsigned char) c >> 3) & 7);
615 *pp++ = '0' + ((unsigned char) c & 7);
616 op++;
617 *op++ = UC_BREAK_PROHIBITED;
618 *op++ = UC_BREAK_PROHIBITED;
619 *op++ = UC_BREAK_PROHIBITED;
621 else if (c == '\\' || c == '"')
623 *pp++ = '\\';
624 *pp++ = c;
625 op++;
626 *op++ = UC_BREAK_PROHIBITED;
628 else
630 #if HAVE_ICONV
631 if (conv != (iconv_t)(-1))
633 /* Copy a complete multi-byte character. Don't
634 interpret the second byte of a multi-byte character as
635 ASCII. This is needed for the BIG5, BIG5-HKSCS, GBK,
636 GB18030, SHIFT_JIS, JOHAB encodings. */
637 char scratchbuf[64];
638 const char *inptr = ep;
639 size_t insize;
640 char *outptr = &scratchbuf[0];
641 size_t outsize = sizeof (scratchbuf);
642 size_t res;
644 res = (size_t)(-1);
645 for (insize = 1; inptr + insize <= es; insize++)
647 res = iconv (conv,
648 (ICONV_CONST char **) &inptr, &insize,
649 &outptr, &outsize);
650 if (!(res == (size_t)(-1) && errno == EINVAL))
651 break;
652 /* We expect that no input bytes have been consumed
653 so far. */
654 if (inptr != ep)
655 abort ();
657 if (res == (size_t)(-1))
659 if (errno == EILSEQ)
661 po_error (0, 0, _("invalid multibyte sequence"));
662 continue;
664 else
665 abort ();
667 insize = inptr - ep;
668 memcpy_small (pp, ep, insize);
669 pp += insize;
670 op += insize;
671 ep += insize - 1;
673 else
674 #endif
676 if (weird_cjk
677 /* Special handling of encodings with CJK structure. */
678 && ep + 2 <= es
679 && (unsigned char) c >= 0x80
680 && (unsigned char) ep[1] >= 0x30)
682 *pp++ = c;
683 ep += 1;
684 *pp++ = *ep;
685 op += 2;
687 else
689 *pp++ = c;
690 op++;
696 /* Don't break immediately before the "\n" at the end. */
697 if (es > s && es[-1] == '\n')
698 overrides[portion_len - 2] = UC_BREAK_PROHIBITED;
700 linebreaks = (char *) xmalloc (portion_len);
702 /* Subsequent lines after a break are all indented.
703 See INDENT-S. */
704 startcol_after_break = (line_prefix ? strlen (line_prefix) : 0);
705 if (indent)
706 startcol_after_break = (startcol_after_break + 8) & ~7;
707 startcol_after_break++;
709 /* The line width. Allow room for the closing quote character. */
710 width = (wrap_strings && do_wrap != no ? page_width : INT_MAX) - 1;
711 /* Adjust for indentation of subsequent lines. */
712 width -= startcol_after_break;
714 recompute:
715 /* The line starts with different things depending on whether it
716 is the first line, and if we are using the indented style.
717 See INDENT-F. */
718 startcol = (line_prefix ? strlen (line_prefix) : 0);
719 if (first_line)
721 startcol += strlen (name);
722 if (indent)
723 startcol = (startcol + 8) & ~7;
724 else
725 startcol++;
727 else
729 if (indent)
730 startcol = (startcol + 8) & ~7;
732 /* Allow room for the opening quote character. */
733 startcol++;
734 /* Adjust for indentation of subsequent lines. */
735 startcol -= startcol_after_break;
737 /* Do line breaking on the portion. */
738 mbs_width_linebreaks (portion, portion_len, width, startcol, 0,
739 overrides, canon_charset, linebreaks);
741 /* If this is the first line, and we are not using the indented
742 style, and the line would wrap, then use an empty first line
743 and restart. */
744 if (first_line && !indent
745 && portion_len > 0
746 && (*es != '\0'
747 || startcol > width
748 || memchr (linebreaks, UC_BREAK_POSSIBLE, portion_len) != NULL))
750 if (line_prefix != NULL)
751 fputs (line_prefix, fp);
752 fputs (name, fp);
753 fputs (" \"\"\n", fp);
754 first_line = false;
755 /* Recompute startcol and linebreaks. */
756 goto recompute;
759 /* Print the beginning of the line. This will depend on whether
760 this is the first line, and if the indented style is being
761 used. INDENT-F. */
762 if (line_prefix != NULL)
763 fputs (line_prefix, fp);
764 if (first_line)
766 fputs (name, fp);
767 putc (indent ? '\t' : ' ', fp);
768 first_line = false;
770 else
772 if (indent)
773 putc ('\t', fp);
776 /* Print the portion itself, with linebreaks where necessary. */
777 putc ('"', fp);
778 for (i = 0; i < portion_len; i++)
780 if (linebreaks[i] == UC_BREAK_POSSIBLE)
782 fputs ("\"\n", fp);
783 /* INDENT-S. */
784 if (line_prefix != NULL)
785 fputs (line_prefix, fp);
786 if (indent)
787 putc ('\t', fp);
788 putc ('"', fp);
790 putc (portion[i], fp);
792 fputs ("\"\n", fp);
794 free (linebreaks);
795 free (overrides);
796 free (portion);
798 s = es;
799 # undef is_escape
801 while (*s);
803 #if HAVE_ICONV
804 if (conv != (iconv_t)(-1))
805 iconv_close (conv);
806 #endif
810 static void
811 print_blank_line (FILE *fp)
813 if (uniforum)
814 fputs ("#\n", fp);
815 else
816 putc ('\n', fp);
820 static void
821 message_print (const message_ty *mp, FILE *fp, const char *charset,
822 bool blank_line, bool debug)
824 /* Separate messages with a blank line. Uniforum doesn't like blank
825 lines, so use an empty comment (unless there already is one). */
826 if (blank_line && (!uniforum
827 || mp->comment == NULL
828 || mp->comment->nitems == 0
829 || mp->comment->item[0][0] != '\0'))
830 print_blank_line (fp);
832 /* Print translator comment if available. */
833 message_print_comment (mp, fp);
835 /* Print xgettext extracted comments. */
836 message_print_comment_dot (mp, fp);
838 /* Print the file position comments. This will help a human who is
839 trying to navigate the sources. There is no problem of getting
840 repeated positions, because duplicates are checked for. */
841 message_print_comment_filepos (mp, fp, uniforum, page_width);
843 /* Print flag information in special comment. */
844 message_print_comment_flags (mp, fp, debug);
846 /* Print each of the message components. Wrap them nicely so they
847 are as readable as possible. If there is no recorded msgstr for
848 this domain, emit an empty string. */
849 if (!is_ascii_string (mp->msgid)
850 && po_charset_canonicalize (charset) != po_charset_utf8)
851 po_multiline_warning (xasprintf (_("warning: ")),
852 xasprintf (_("\
853 The following msgid contains non-ASCII characters.\n\
854 This will cause problems to translators who use a character encoding\n\
855 different from yours. Consider using a pure ASCII msgid instead.\n\
856 %s\n"), mp->msgid));
857 wrap (fp, NULL, "msgid", mp->msgid, mp->do_wrap, charset);
858 if (mp->msgid_plural != NULL)
859 wrap (fp, NULL, "msgid_plural", mp->msgid_plural, mp->do_wrap, charset);
861 if (mp->msgid_plural == NULL)
862 wrap (fp, NULL, "msgstr", mp->msgstr, mp->do_wrap, charset);
863 else
865 char prefix_buf[20];
866 unsigned int i;
867 const char *p;
869 for (p = mp->msgstr, i = 0;
870 p < mp->msgstr + mp->msgstr_len;
871 p += strlen (p) + 1, i++)
873 sprintf (prefix_buf, "msgstr[%u]", i);
874 wrap (fp, NULL, prefix_buf, p, mp->do_wrap, charset);
880 static void
881 message_print_obsolete (const message_ty *mp, FILE *fp, const char *charset,
882 bool blank_line)
884 /* If msgstr is the empty string we print nothing. */
885 if (mp->msgstr[0] == '\0')
886 return;
888 /* Separate messages with a blank line. Uniforum doesn't like blank
889 lines, so use an empty comment (unless there already is one). */
890 if (blank_line)
891 print_blank_line (fp);
893 /* Print translator comment if available. */
894 message_print_comment (mp, fp);
896 /* Print flag information in special comment. */
897 if (mp->is_fuzzy)
899 bool first = true;
901 putc ('#', fp);
902 putc (',', fp);
904 if (mp->is_fuzzy)
906 fputs (" fuzzy", fp);
907 first = false;
910 putc ('\n', fp);
913 /* Print each of the message components. Wrap them nicely so they
914 are as readable as possible. */
915 if (!is_ascii_string (mp->msgid)
916 && po_charset_canonicalize (charset) != po_charset_utf8)
917 po_multiline_warning (xasprintf (_("warning: ")),
918 xasprintf (_("\
919 The following msgid contains non-ASCII characters.\n\
920 This will cause problems to translators who use a character encoding\n\
921 different from yours. Consider using a pure ASCII msgid instead.\n\
922 %s\n"), mp->msgid));
923 wrap (fp, "#~ ", "msgid", mp->msgid, mp->do_wrap, charset);
924 if (mp->msgid_plural != NULL)
925 wrap (fp, "#~ ", "msgid_plural", mp->msgid_plural, mp->do_wrap, charset);
927 if (mp->msgid_plural == NULL)
928 wrap (fp, "#~ ", "msgstr", mp->msgstr, mp->do_wrap, charset);
929 else
931 char prefix_buf[20];
932 unsigned int i;
933 const char *p;
935 for (p = mp->msgstr, i = 0;
936 p < mp->msgstr + mp->msgstr_len;
937 p += strlen (p) + 1, i++)
939 sprintf (prefix_buf, "msgstr[%u]", i);
940 wrap (fp, "#~ ", prefix_buf, p, mp->do_wrap, charset);
946 static void
947 msgdomain_list_print_po (msgdomain_list_ty *mdlp, FILE *fp, bool debug)
949 size_t j, k;
950 bool blank_line;
952 /* Write out the messages for each domain. */
953 blank_line = false;
954 for (k = 0; k < mdlp->nitems; k++)
956 message_list_ty *mlp;
957 const char *header;
958 char *charset;
959 char *allocated_charset;
961 /* If the first domain is the default, don't bother emitting
962 the domain name, because it is the default. */
963 if (!(k == 0
964 && strcmp (mdlp->item[k]->domain, MESSAGE_DOMAIN_DEFAULT) == 0))
966 if (blank_line)
967 print_blank_line (fp);
968 fprintf (fp, "domain \"%s\"\n", mdlp->item[k]->domain);
969 blank_line = true;
972 mlp = mdlp->item[k]->messages;
974 /* Search the header entry. */
975 header = NULL;
976 for (j = 0; j < mlp->nitems; ++j)
977 if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
979 header = mlp->item[j]->msgstr;
980 break;
983 /* Extract the charset name. */
984 charset = "ASCII";
985 allocated_charset = NULL;
986 if (header != NULL)
988 const char *charsetstr = strstr (header, "charset=");
990 if (charsetstr != NULL)
992 size_t len;
994 charsetstr += strlen ("charset=");
995 len = strcspn (charsetstr, " \t\n");
996 charset = allocated_charset = (char *) xallocsa (len + 1);
997 memcpy (charset, charsetstr, len);
998 charset[len] = '\0';
1000 /* Treat the dummy default value as if it were absent. */
1001 if (strcmp (charset, "CHARSET") == 0)
1002 charset = "ASCII";
1006 /* Write out each of the messages for this domain. */
1007 for (j = 0; j < mlp->nitems; ++j)
1008 if (!mlp->item[j]->obsolete)
1010 message_print (mlp->item[j], fp, charset, blank_line, debug);
1011 blank_line = true;
1014 /* Write out each of the obsolete messages for this domain. */
1015 for (j = 0; j < mlp->nitems; ++j)
1016 if (mlp->item[j]->obsolete)
1018 message_print_obsolete (mlp->item[j], fp, charset, blank_line);
1019 blank_line = true;
1022 if (allocated_charset != NULL)
1023 freesa (allocated_charset);
1028 void
1029 msgdomain_list_print (msgdomain_list_ty *mdlp, const char *filename,
1030 bool force, bool debug)
1032 FILE *fp;
1034 /* We will not write anything if, for every domain, we have no message
1035 or only the header entry. */
1036 if (!force)
1038 bool found_nonempty = false;
1039 size_t k;
1041 for (k = 0; k < mdlp->nitems; k++)
1043 message_list_ty *mlp = mdlp->item[k]->messages;
1045 if (!(mlp->nitems == 0
1046 || (mlp->nitems == 1 && mlp->item[0]->msgid[0] == '\0')))
1048 found_nonempty = true;
1049 break;
1053 if (!found_nonempty)
1054 return;
1057 /* Check whether the output format can accomodate all messages. */
1058 if (use_syntax_properties || use_syntax_stringtable)
1060 if (mdlp->nitems > 1)
1062 if (use_syntax_properties)
1063 po_error (EXIT_FAILURE, 0, _("Cannot output multiple translation domains into a single file with Java .properties syntax. Try using PO file syntax instead."));
1064 if (use_syntax_stringtable)
1065 po_error (EXIT_FAILURE, 0, _("Cannot output multiple translation domains into a single file with NeXTstep/GNUstep .strings syntax."));
1067 if (mdlp->nitems == 1)
1069 message_list_ty *mlp = mdlp->item[0]->messages;
1070 const lex_pos_ty *has_plural;
1071 size_t j;
1073 has_plural = NULL;
1074 for (j = 0; j < mlp->nitems; j++)
1076 message_ty *mp = mlp->item[j];
1078 if (mp->msgid_plural != NULL)
1080 has_plural = &mp->pos;
1081 break;
1085 if (has_plural != NULL)
1087 error_with_progname = false;
1088 if (use_syntax_properties)
1089 po_error_at_line (EXIT_FAILURE, 0,
1090 has_plural->file_name, has_plural->line_number,
1091 _("message catalog has plural form translations, but the output format does not support them. Try generating a Java class using \"msgfmt --java\", instead of a properties file."));
1092 if (use_syntax_stringtable)
1093 po_error_at_line (EXIT_FAILURE, 0,
1094 has_plural->file_name, has_plural->line_number,
1095 _("message catalog has plural form translations, but the output format does not support them."));
1096 error_with_progname = true;
1101 /* Open the output file. */
1102 if (filename != NULL && strcmp (filename, "-") != 0
1103 && strcmp (filename, "/dev/stdout") != 0)
1105 fp = fopen (filename, "w");
1106 if (fp == NULL)
1107 po_error (EXIT_FAILURE, errno, _("cannot create output file \"%s\""),
1108 filename);
1110 else
1112 fp = stdout;
1113 /* xgettext:no-c-format */
1114 filename = _("standard output");
1117 if (use_syntax_properties)
1118 msgdomain_list_print_properties (mdlp, fp, page_width, debug);
1119 else if (use_syntax_stringtable)
1120 msgdomain_list_print_stringtable (mdlp, fp, page_width, debug);
1121 else
1122 msgdomain_list_print_po (mdlp, fp, debug);
1124 /* Make sure nothing went wrong. */
1125 if (fwriteerror (fp))
1126 po_error (EXIT_FAILURE, errno, _("error while writing \"%s\" file"),
1127 filename);
1131 /* =============================== Sorting. ================================ */
1134 static int
1135 cmp_by_msgid (const void *va, const void *vb)
1137 const message_ty *a = *(const message_ty **) va;
1138 const message_ty *b = *(const message_ty **) vb;
1139 /* Because msgids normally contain only ASCII characters, it is OK to
1140 sort them as if we were in the C locale. And strcoll() in the C locale
1141 is the same as strcmp(). */
1142 return strcmp (a->msgid, b->msgid);
1146 void
1147 msgdomain_list_sort_by_msgid (msgdomain_list_ty *mdlp)
1149 size_t k;
1151 for (k = 0; k < mdlp->nitems; k++)
1153 message_list_ty *mlp = mdlp->item[k]->messages;
1155 if (mlp->nitems > 0)
1156 qsort (mlp->item, mlp->nitems, sizeof (mlp->item[0]), cmp_by_msgid);
1161 /* Sort the file positions of every message. */
1163 static int
1164 cmp_filepos (const void *va, const void *vb)
1166 const lex_pos_ty *a = (const lex_pos_ty *) va;
1167 const lex_pos_ty *b = (const lex_pos_ty *) vb;
1168 int cmp;
1170 cmp = strcmp (a->file_name, b->file_name);
1171 if (cmp == 0)
1172 cmp = (int) a->line_number - (int) b->line_number;
1174 return cmp;
1177 static void
1178 msgdomain_list_sort_filepos (msgdomain_list_ty *mdlp)
1180 size_t j, k;
1182 for (k = 0; k < mdlp->nitems; k++)
1184 message_list_ty *mlp = mdlp->item[k]->messages;
1186 for (j = 0; j < mlp->nitems; j++)
1188 message_ty *mp = mlp->item[j];
1190 if (mp->filepos_count > 0)
1191 qsort (mp->filepos, mp->filepos_count, sizeof (mp->filepos[0]),
1192 cmp_filepos);
1198 /* Sort the messages according to the file position. */
1200 static int
1201 cmp_by_filepos (const void *va, const void *vb)
1203 const message_ty *a = *(const message_ty **) va;
1204 const message_ty *b = *(const message_ty **) vb;
1205 int cmp;
1207 /* No filepos is smaller than any other filepos. */
1208 if (a->filepos_count == 0)
1210 if (b->filepos_count != 0)
1211 return -1;
1213 if (b->filepos_count == 0)
1214 return 1;
1216 /* Compare on the file names... */
1217 cmp = strcmp (a->filepos[0].file_name, b->filepos[0].file_name);
1218 if (cmp != 0)
1219 return cmp;
1221 /* If they are equal, compare on the line numbers... */
1222 cmp = a->filepos[0].line_number - b->filepos[0].line_number;
1223 if (cmp != 0)
1224 return cmp;
1226 /* If they are equal, compare on the msgid strings. */
1227 /* Because msgids normally contain only ASCII characters, it is OK to
1228 sort them as if we were in the C locale. And strcoll() in the C locale
1229 is the same as strcmp(). */
1230 return strcmp (a->msgid, b->msgid);
1234 void
1235 msgdomain_list_sort_by_filepos (msgdomain_list_ty *mdlp)
1237 size_t k;
1239 /* It makes sense to compare filepos[0] of different messages only after
1240 the filepos[] array of each message has been sorted. Sort it now. */
1241 msgdomain_list_sort_filepos (mdlp);
1243 for (k = 0; k < mdlp->nitems; k++)
1245 message_list_ty *mlp = mdlp->item[k]->messages;
1247 if (mlp->nitems > 0)
1248 qsort (mlp->item, mlp->nitems, sizeof (mlp->item[0]), cmp_by_filepos);