gnu/dist/gettext/gettext-tools/src/write-po.c

   1 /* GNU gettext - internationalization aids
   2    Copyright (C) 1995-1998, 2000-2005 Free Software Foundation, Inc.
   3
   4    This file was written by Peter Miller <millerp@canb.auug.org.au>
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software Foundation,
  18    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23 #include <alloca.h>
  24
  25 /* Specification.  */
  26 #include "write-po.h"
  27
  28 #include <errno.h>
  29 #include <limits.h>
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33
  34 #if HAVE_ICONV
  35 # include <iconv.h>
  36 #endif
  37
  38 #include "c-ctype.h"
  39 #include "po-charset.h"
  40 #include "linebreak.h"
  41 #include "msgl-ascii.h"
  42 #include "write-properties.h"
  43 #include "write-stringtable.h"
  44 #include "xalloc.h"
  45 #include "xallocsa.h"
  46 #include "strstr.h"
  47 #include "fwriteerror.h"
  48 #include "exit.h"
  49 #include "error-progname.h"
  50 #include "xerror.h"
  51 #include "po-error.h"
  52 #include "gettext.h"
  53
  54 /* Our regular abbreviation.  */
  55 #define _(str) gettext (str)
  56
  57 #if HAVE_DECL_PUTC_UNLOCKED
  58 # undef putc
  59 # define putc putc_unlocked
  60 #endif
  61
  62
  63 /* =================== Putting together a #, flags line. =================== */
  64
  65
  66 /* Convert IS_FORMAT in the context of programming language LANG to a flag
  67    string for use in #, flags.  */
  68
  69 const char *
  70 make_format_description_string (enum is_format is_format, const char *lang,
  71                                 bool debug)
  72 {
  73   static char result[100];
  74
  75   switch (is_format)
  76     {
  77     case possible:
  78       if (debug)
  79         {
  80           sprintf (result, " possible-%s-format", lang);
  81           break;
  82         }
  83       /* FALLTHROUGH */
  84     case yes_according_to_context:
  85     case yes:
  86       sprintf (result, " %s-format", lang);
  87       break;
  88     case no:
  89       sprintf (result, " no-%s-format", lang);
  90       break;
  91     default:
  92       /* The others have already been filtered out by significant_format_p.  */
  93       abort ();
  94     }
  95
  96   return result;
  97 }
  98
  99
 100 /* Return true if IS_FORMAT is worth mentioning in a #, flags list.  */
 101
 102 bool
 103 significant_format_p (enum is_format is_format)
 104 {
 105   return is_format != undecided && is_format != impossible;
 106 }
 107
 108
 109 /* Return true if one of IS_FORMAT is worth mentioning in a #, flags list.  */
 110
 111 static bool
 112 has_significant_format_p (const enum is_format is_format[NFORMATS])
 113 {
 114   size_t i;
 115
 116   for (i = 0; i < NFORMATS; i++)
 117     if (significant_format_p (is_format[i]))
 118       return true;
 119   return false;
 120 }
 121
 122
 123 /* Convert a wrapping flag DO_WRAP to a string for use in #, flags.  */
 124
 125 static const char *
 126 make_c_width_description_string (enum is_wrap do_wrap)
 127 {
 128   const char *result = NULL;
 129
 130   switch (do_wrap)
 131     {
 132     case yes:
 133       result = " wrap";
 134       break;
 135     case no:
 136       result = " no-wrap";
 137       break;
 138     default:
 139       abort ();
 140     }
 141
 142   return result;
 143 }
 144
 145
 146 /* ================ Output parts of a message, as comments. ================ */
 147
 148
 149 /* Output mp->comment as a set of comment lines.  */
 150
 151 void
 152 message_print_comment (const message_ty *mp, FILE *fp)
 153 {
 154   if (mp->comment != NULL)
 155     {
 156       size_t j;
 157
 158       for (j = 0; j < mp->comment->nitems; ++j)
 159         {
 160           const char *s = mp->comment->item[j];
 161           do
 162             {
 163               const char *e;
 164               putc ('#', fp);
 165               if (*s != '\0' && *s != ' ')
 166                 putc (' ', fp);
 167               e = strchr (s, '\n');
 168               if (e == NULL)
 169                 {
 170                   fputs (s, fp);
 171                   s = NULL;
 172                 }
 173               else
 174                 {
 175                   fwrite (s, 1, e - s, fp);
 176                   s = e + 1;
 177                 }
 178               putc ('\n', fp);
 179             }
 180           while (s != NULL);
 181         }
 182     }
 183 }
 184
 185
 186 /* Output mp->comment_dot as a set of comment lines.  */
 187
 188 void
 189 message_print_comment_dot (const message_ty *mp, FILE *fp)
 190 {
 191   if (mp->comment_dot != NULL)
 192     {
 193       size_t j;
 194
 195       for (j = 0; j < mp->comment_dot->nitems; ++j)
 196         {
 197           const char *s = mp->comment_dot->item[j];
 198           putc ('#', fp);
 199           putc ('.', fp);
 200           if (*s != '\0' && *s != ' ')
 201             putc (' ', fp);
 202           fputs (s, fp);
 203           putc ('\n', fp);
 204         }
 205     }
 206 }
 207
 208
 209 /* Output mp->filepos as a set of comment lines.  */
 210
 211 void
 212 message_print_comment_filepos (const message_ty *mp, FILE *fp,
 213                                bool uniforum, size_t page_width)
 214 {
 215   if (mp->filepos_count != 0)
 216     {
 217       if (uniforum)
 218         {
 219           size_t j;
 220
 221           for (j = 0; j < mp->filepos_count; ++j)
 222             {
 223               lex_pos_ty *pp = &mp->filepos[j];
 224               char *cp = pp->file_name;
 225               while (cp[0] == '.' && cp[1] == '/')
 226                 cp += 2;
 227               /* There are two Sun formats to choose from: SunOS and
 228                  Solaris.  Use the Solaris form here.  */
 229               fprintf (fp, "# File: %s, line: %ld\n",
 230                        cp, (long) pp->line_number);
 231             }
 232         }
 233       else
 234         {
 235           size_t column;
 236           size_t j;
 237
 238           fputs ("#:", fp);
 239           column = 2;
 240           for (j = 0; j < mp->filepos_count; ++j)
 241             {
 242               lex_pos_ty *pp;
 243               char buffer[21];
 244               char *cp;
 245               size_t len;
 246
 247               pp = &mp->filepos[j];
 248               cp = pp->file_name;
 249               while (cp[0] == '.' && cp[1] == '/')
 250                 cp += 2;
 251               /* Some xgettext input formats, like RST, lack line numbers.  */
 252               if (pp->line_number == (size_t)(-1))
 253                 buffer[0] = '\0';
 254               else
 255                 sprintf (buffer, ":%ld", (long) pp->line_number);
 256               len = strlen (cp) + strlen (buffer) + 1;
 257               if (column > 2 && column + len >= page_width)
 258                 {
 259                   fputs ("\n#:", fp);
 260                   column = 2;
 261                 }
 262               fprintf (fp, " %s%s", cp, buffer);
 263               column += len;
 264             }
 265           putc ('\n', fp);
 266         }
 267     }
 268 }
 269
 270
 271 /* Output mp->is_fuzzy, mp->is_format, mp->do_wrap as a comment line.  */
 272
 273 void
 274 message_print_comment_flags (const message_ty *mp, FILE *fp, bool debug)
 275 {
 276   if ((mp->is_fuzzy && mp->msgstr[0] != '\0')
 277       || has_significant_format_p (mp->is_format)
 278       || mp->do_wrap == no)
 279     {
 280       bool first_flag = true;
 281       size_t i;
 282
 283       putc ('#', fp);
 284       putc (',', fp);
 285
 286       /* We don't print the fuzzy flag if the msgstr is empty.  This
 287          might be introduced by the user but we want to normalize the
 288          output.  */
 289       if (mp->is_fuzzy && mp->msgstr[0] != '\0')
 290         {
 291           fputs (" fuzzy", fp);
 292           first_flag = false;
 293         }
 294
 295       for (i = 0; i < NFORMATS; i++)
 296         if (significant_format_p (mp->is_format[i]))
 297           {
 298             if (!first_flag)
 299               putc (',', fp);
 300
 301             fputs (make_format_description_string (mp->is_format[i],
 302                                                    format_language[i], debug),
 303                    fp);
 304             first_flag = false;
 305           }
 306
 307       if (mp->do_wrap == no)
 308         {
 309           if (!first_flag)
 310             putc (',', fp);
 311
 312           fputs (make_c_width_description_string (mp->do_wrap), fp);
 313           first_flag = false;
 314         }
 315
 316       putc ('\n', fp);
 317     }
 318 }
 319
 320
 321 /* =========== Some parameters for use by 'msgdomain_list_print'. ========== */
 322
 323
 324 /* This variable controls the page width when printing messages.
 325    Defaults to PAGE_WIDTH if not set.  Zero (0) given to message_page_-
 326    width_set will result in no wrapping being performed.  */
 327 static size_t page_width = PAGE_WIDTH;
 328
 329 void
 330 message_page_width_set (size_t n)
 331 {
 332   if (n == 0)
 333     {
 334       page_width = INT_MAX;
 335       return;
 336     }
 337
 338   if (n < 20)
 339     n = 20;
 340
 341   page_width = n;
 342 }
 343
 344
 345 /* This variable controls the extent to which the page width applies.
 346    True means it applies to message strings and file reference lines.
 347    False means it applies to file reference lines only.  */
 348 static bool wrap_strings = true;
 349
 350 void
 351 message_page_width_ignore ()
 352 {
 353   wrap_strings = false;
 354 }
 355
 356
 357 /* These three variables control the output style of the message_print
 358    function.  Interface functions for them are to be used.  */
 359 static bool indent = false;
 360 static bool uniforum = false;
 361 static bool escape = false;
 362
 363 void
 364 message_print_style_indent ()
 365 {
 366   indent = true;
 367 }
 368
 369 void
 370 message_print_style_uniforum ()
 371 {
 372   uniforum = true;
 373 }
 374
 375 void
 376 message_print_style_escape (bool flag)
 377 {
 378   escape = flag;
 379 }
 380
 381
 382 /* Whether to output a file in Java .properties syntax.  */
 383 static bool use_syntax_properties = false;
 384
 385 void
 386 message_print_syntax_properties ()
 387 {
 388   use_syntax_properties = true;
 389 }
 390
 391
 392 /* Whether to output a file in NeXTstep/GNUstep .strings syntax.  */
 393 static bool use_syntax_stringtable = false;
 394
 395 void
 396 message_print_syntax_stringtable ()
 397 {
 398   use_syntax_stringtable = true;
 399 }
 400
 401
 402 /* ================ msgdomain_list_print() and subroutines. ================ */
 403
 404
 405 /* A version of memcpy optimized for the case n <= 1.  */
 406 static inline void
 407 memcpy_small (void *dst, const void *src, size_t n)
 408 {
 409   if (n > 0)
 410     {
 411       char *q = (char *) dst;
 412       const char *p = (const char *) src;
 413
 414       *q = *p;
 415       if (--n > 0)
 416         do *++q = *++p; while (--n > 0);
 417     }
 418 }
 419
 420
 421 static void
 422 wrap (FILE *fp, const char *line_prefix, const char *name, const char *value,
 423       enum is_wrap do_wrap, const char *charset)
 424 {
 425   const char *canon_charset;
 426   const char *s;
 427   bool first_line;
 428 #if HAVE_ICONV
 429   const char *envval;
 430   iconv_t conv;
 431 #endif
 432   bool weird_cjk;
 433
 434   canon_charset = po_charset_canonicalize (charset);
 435
 436 #if HAVE_ICONV
 437   /* The old Solaris/openwin msgfmt and GNU msgfmt <= 0.10.35 don't know
 438      about multibyte encodings, and require a spurious backslash after
 439      every multibyte character whose last byte is 0x5C.  Some programs,
 440      like vim, distribute PO files in this broken format.  It is important
 441      for such programs that GNU msgmerge continues to support this old
 442      PO file format when the Makefile requests it.  */
 443   envval = getenv ("OLD_PO_FILE_OUTPUT");
 444   if (envval != NULL && *envval != '\0')
 445     /* Write a PO file in old format, with extraneous backslashes.  */
 446     conv = (iconv_t)(-1);
 447   else
 448     if (canon_charset == NULL)
 449       /* Invalid PO file encoding.  */
 450       conv = (iconv_t)(-1);
 451     else
 452       /* Avoid glibc-2.1 bug with EUC-KR.  */
 453 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
 454       if (strcmp (canon_charset, "EUC-KR") == 0)
 455         conv = (iconv_t)(-1);
 456       else
 457 # endif
 458       /* Avoid Solaris 2.9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS, GBK,
 459          GB18030.  */
 460 # if defined __sun && !defined _LIBICONV_VERSION
 461       if (   strcmp (canon_charset, "GB2312") == 0
 462           || strcmp (canon_charset, "EUC-TW") == 0
 463           || strcmp (canon_charset, "BIG5") == 0
 464           || strcmp (canon_charset, "BIG5-HKSCS") == 0
 465           || strcmp (canon_charset, "GBK") == 0
 466           || strcmp (canon_charset, "GB18030") == 0)
 467         conv = (iconv_t)(-1);
 468       else
 469 # endif
 470       /* Use iconv() to parse multibyte characters.  */
 471       conv = iconv_open ("UTF-8", canon_charset);
 472
 473   if (conv != (iconv_t)(-1))
 474     weird_cjk = false;
 475   else
 476 #endif
 477     if (canon_charset == NULL)
 478       weird_cjk = false;
 479     else
 480       weird_cjk = po_is_charset_weird_cjk (canon_charset);
 481
 482   if (canon_charset == NULL)
 483     canon_charset = po_charset_ascii;
 484
 485   /* Loop over the '\n' delimited portions of value.  */
 486   s = value;
 487   first_line = true;
 488   do
 489     {
 490       /* The \a and \v escapes were added by the ANSI C Standard.
 491          Prior to the Standard, most compilers did not have them.
 492          Because we need the same program on all platforms we don't provide
 493          support for them here.  Thus we only support \b\f\n\r\t.  */
 494 #     define is_escape(c) \
 495        ((c) == '\b' || (c) == '\f' || (c) == '\n' || (c) == '\r' || (c) == '\t')
 496
 497       const char *es;
 498       const char *ep;
 499       size_t portion_len;
 500       char *portion;
 501       char *overrides;
 502       char *linebreaks;
 503       char *pp;
 504       char *op;
 505       int startcol, startcol_after_break, width;
 506       size_t i;
 507
 508       for (es = s; *es != '\0'; )
 509         if (*es++ == '\n')
 510           break;
 511
 512       /* Expand escape sequences in each portion.  */
 513       for (ep = s, portion_len = 0; ep < es; ep++)
 514         {
 515           char c = *ep;
 516           if (is_escape (c))
 517             portion_len += 2;
 518           else if (escape && !c_isprint ((unsigned char) c))
 519             portion_len += 4;
 520           else if (c == '\\' || c == '"')
 521             portion_len += 2;
 522           else
 523             {
 524 #if HAVE_ICONV
 525               if (conv != (iconv_t)(-1))
 526                 {
 527                   /* Skip over a complete multi-byte character.  Don't
 528                      interpret the second byte of a multi-byte character as
 529                      ASCII.  This is needed for the BIG5, BIG5-HKSCS, GBK,
 530                      GB18030, SHIFT_JIS, JOHAB encodings.  */
 531                   char scratchbuf[64];
 532                   const char *inptr = ep;
 533                   size_t insize;
 534                   char *outptr = &scratchbuf[0];
 535                   size_t outsize = sizeof (scratchbuf);
 536                   size_t res;
 537
 538                   res = (size_t)(-1);
 539                   for (insize = 1; inptr + insize <= es; insize++)
 540                     {
 541                       res = iconv (conv,
 542                                    (ICONV_CONST char **) &inptr, &insize,
 543                                    &outptr, &outsize);
 544                       if (!(res == (size_t)(-1) && errno == EINVAL))
 545                         break;
 546                       /* We expect that no input bytes have been consumed
 547                          so far.  */
 548                       if (inptr != ep)
 549                         abort ();
 550                     }
 551                   if (res == (size_t)(-1))
 552                     {
 553                       if (errno == EILSEQ)
 554                         {
 555                           po_error (0, 0, _("invalid multibyte sequence"));
 556                           continue;
 557                         }
 558                       else
 559                         abort ();
 560                     }
 561                   insize = inptr - ep;
 562                   portion_len += insize;
 563                   ep += insize - 1;
 564                 }
 565               else
 566 #endif
 567                 {
 568                   if (weird_cjk
 569                       /* Special handling of encodings with CJK structure.  */
 570                       && ep + 2 <= es
 571                       && (unsigned char) ep[0] >= 0x80
 572                       && (unsigned char) ep[1] >= 0x30)
 573                     {
 574                       portion_len += 2;
 575                       ep += 1;
 576                     }
 577                   else
 578                     portion_len += 1;
 579                 }
 580             }
 581         }
 582       portion = (char *) xmalloc (portion_len);
 583       overrides = (char *) xmalloc (portion_len);
 584       memset (overrides, UC_BREAK_UNDEFINED, portion_len);
 585       for (ep = s, pp = portion, op = overrides; ep < es; ep++)
 586         {
 587           char c = *ep;
 588           if (is_escape (c))
 589             {
 590               switch (c)
 591                 {
 592                 case '\b': c = 'b'; break;
 593                 case '\f': c = 'f'; break;
 594                 case '\n': c = 'n'; break;
 595                 case '\r': c = 'r'; break;
 596                 case '\t': c = 't'; break;
 597                 default: abort ();
 598                 }
 599               *pp++ = '\\';
 600               *pp++ = c;
 601               op++;
 602               *op++ = UC_BREAK_PROHIBITED;
 603               /* We warn about any use of escape sequences beside
 604                  '\n' and '\t'.  */
 605               if (c != 'n' && c != 't')
 606                 po_error (0, 0, _("\
 607 internationalized messages should not contain the `\\%c' escape sequence"),
 608                           c);
 609             }
 610           else if (escape && !c_isprint ((unsigned char) c))
 611             {
 612               *pp++ = '\\';
 613               *pp++ = '0' + (((unsigned char) c >> 6) & 7);
 614               *pp++ = '0' + (((unsigned char) c >> 3) & 7);
 615               *pp++ = '0' + ((unsigned char) c & 7);
 616               op++;
 617               *op++ = UC_BREAK_PROHIBITED;
 618               *op++ = UC_BREAK_PROHIBITED;
 619               *op++ = UC_BREAK_PROHIBITED;
 620             }
 621           else if (c == '\\' || c == '"')
 622             {
 623               *pp++ = '\\';
 624               *pp++ = c;
 625               op++;
 626               *op++ = UC_BREAK_PROHIBITED;
 627             }
 628           else
 629             {
 630 #if HAVE_ICONV
 631               if (conv != (iconv_t)(-1))
 632                 {
 633                   /* Copy a complete multi-byte character.  Don't
 634                      interpret the second byte of a multi-byte character as
 635                      ASCII.  This is needed for the BIG5, BIG5-HKSCS, GBK,
 636                      GB18030, SHIFT_JIS, JOHAB encodings.  */
 637                   char scratchbuf[64];
 638                   const char *inptr = ep;
 639                   size_t insize;
 640                   char *outptr = &scratchbuf[0];
 641                   size_t outsize = sizeof (scratchbuf);
 642                   size_t res;
 643
 644                   res = (size_t)(-1);
 645                   for (insize = 1; inptr + insize <= es; insize++)
 646                     {
 647                       res = iconv (conv,
 648                                    (ICONV_CONST char **) &inptr, &insize,
 649                                    &outptr, &outsize);
 650                       if (!(res == (size_t)(-1) && errno == EINVAL))
 651                         break;
 652                       /* We expect that no input bytes have been consumed
 653                          so far.  */
 654                       if (inptr != ep)
 655                         abort ();
 656                     }
 657                   if (res == (size_t)(-1))
 658                     {
 659                       if (errno == EILSEQ)
 660                         {
 661                           po_error (0, 0, _("invalid multibyte sequence"));
 662                           continue;
 663                         }
 664                       else
 665                         abort ();
 666                     }
 667                   insize = inptr - ep;
 668                   memcpy_small (pp, ep, insize);
 669                   pp += insize;
 670                   op += insize;
 671                   ep += insize - 1;
 672                 }
 673               else
 674 #endif
 675                 {
 676                   if (weird_cjk
 677                       /* Special handling of encodings with CJK structure.  */
 678                       && ep + 2 <= es
 679                       && (unsigned char) c >= 0x80
 680                       && (unsigned char) ep[1] >= 0x30)
 681                     {
 682                       *pp++ = c;
 683                       ep += 1;
 684                       *pp++ = *ep;
 685                       op += 2;
 686                     }
 687                   else
 688                     {
 689                       *pp++ = c;
 690                       op++;
 691                     }
 692                 }
 693             }
 694         }
 695
 696       /* Don't break immediately before the "\n" at the end.  */
 697       if (es > s && es[-1] == '\n')
 698         overrides[portion_len - 2] = UC_BREAK_PROHIBITED;
 699
 700       linebreaks = (char *) xmalloc (portion_len);
 701
 702       /* Subsequent lines after a break are all indented.
 703          See INDENT-S.  */
 704       startcol_after_break = (line_prefix ? strlen (line_prefix) : 0);
 705       if (indent)
 706         startcol_after_break = (startcol_after_break + 8) & ~7;
 707       startcol_after_break++;
 708
 709       /* The line width.  Allow room for the closing quote character.  */
 710       width = (wrap_strings && do_wrap != no ? page_width : INT_MAX) - 1;
 711       /* Adjust for indentation of subsequent lines.  */
 712       width -= startcol_after_break;
 713
 714     recompute:
 715       /* The line starts with different things depending on whether it
 716          is the first line, and if we are using the indented style.
 717          See INDENT-F.  */
 718       startcol = (line_prefix ? strlen (line_prefix) : 0);
 719       if (first_line)
 720         {
 721           startcol += strlen (name);
 722           if (indent)
 723             startcol = (startcol + 8) & ~7;
 724           else
 725             startcol++;
 726         }
 727       else
 728         {
 729           if (indent)
 730             startcol = (startcol + 8) & ~7;
 731         }
 732       /* Allow room for the opening quote character.  */
 733       startcol++;
 734       /* Adjust for indentation of subsequent lines.  */
 735       startcol -= startcol_after_break;
 736
 737       /* Do line breaking on the portion.  */
 738       mbs_width_linebreaks (portion, portion_len, width, startcol, 0,
 739                             overrides, canon_charset, linebreaks);
 740
 741       /* If this is the first line, and we are not using the indented
 742          style, and the line would wrap, then use an empty first line
 743          and restart.  */
 744       if (first_line && !indent
 745           && portion_len > 0
 746           && (*es != '\0'
 747               || startcol > width
 748               || memchr (linebreaks, UC_BREAK_POSSIBLE, portion_len) != NULL))
 749         {
 750           if (line_prefix != NULL)
 751             fputs (line_prefix, fp);
 752           fputs (name, fp);
 753           fputs (" \"\"\n", fp);
 754           first_line = false;
 755           /* Recompute startcol and linebreaks.  */
 756           goto recompute;
 757         }
 758
 759       /* Print the beginning of the line.  This will depend on whether
 760          this is the first line, and if the indented style is being
 761          used.  INDENT-F.  */
 762       if (line_prefix != NULL)
 763         fputs (line_prefix, fp);
 764       if (first_line)
 765         {
 766           fputs (name, fp);
 767           putc (indent ? '\t' : ' ', fp);
 768           first_line = false;
 769         }
 770       else
 771         {
 772           if (indent)
 773             putc ('\t', fp);
 774         }
 775
 776       /* Print the portion itself, with linebreaks where necessary.  */
 777       putc ('"', fp);
 778       for (i = 0; i < portion_len; i++)
 779         {
 780           if (linebreaks[i] == UC_BREAK_POSSIBLE)
 781             {
 782               fputs ("\"\n", fp);
 783               /* INDENT-S.  */
 784               if (line_prefix != NULL)
 785                 fputs (line_prefix, fp);
 786               if (indent)
 787                 putc ('\t', fp);
 788               putc ('"', fp);
 789             }
 790           putc (portion[i], fp);
 791         }
 792       fputs ("\"\n", fp);
 793
 794       free (linebreaks);
 795       free (overrides);
 796       free (portion);
 797
 798       s = es;
 799 #     undef is_escape
 800     }
 801   while (*s);
 802
 803 #if HAVE_ICONV
 804   if (conv != (iconv_t)(-1))
 805     iconv_close (conv);
 806 #endif
 807 }
 808
 809
 810 static void
 811 print_blank_line (FILE *fp)
 812 {
 813   if (uniforum)
 814     fputs ("#\n", fp);
 815   else
 816     putc ('\n', fp);
 817 }
 818
 819
 820 static void
 821 message_print (const message_ty *mp, FILE *fp, const char *charset,
 822                bool blank_line, bool debug)
 823 {
 824   /* Separate messages with a blank line.  Uniforum doesn't like blank
 825      lines, so use an empty comment (unless there already is one).  */
 826   if (blank_line && (!uniforum
 827                      || mp->comment == NULL
 828                      || mp->comment->nitems == 0
 829                      || mp->comment->item[0][0] != '\0'))
 830     print_blank_line (fp);
 831
 832   /* Print translator comment if available.  */
 833   message_print_comment (mp, fp);
 834
 835   /* Print xgettext extracted comments.  */
 836   message_print_comment_dot (mp, fp);
 837
 838   /* Print the file position comments.  This will help a human who is
 839      trying to navigate the sources.  There is no problem of getting
 840      repeated positions, because duplicates are checked for.  */
 841   message_print_comment_filepos (mp, fp, uniforum, page_width);
 842
 843   /* Print flag information in special comment.  */
 844   message_print_comment_flags (mp, fp, debug);
 845
 846   /* Print each of the message components.  Wrap them nicely so they
 847      are as readable as possible.  If there is no recorded msgstr for
 848      this domain, emit an empty string.  */
 849   if (!is_ascii_string (mp->msgid)
 850       && po_charset_canonicalize (charset) != po_charset_utf8)
 851     po_multiline_warning (xasprintf (_("warning: ")),
 852                           xasprintf (_("\
 853 The following msgid contains non-ASCII characters.\n\
 854 This will cause problems to translators who use a character encoding\n\
 855 different from yours. Consider using a pure ASCII msgid instead.\n\
 856 %s\n"), mp->msgid));
 857   wrap (fp, NULL, "msgid", mp->msgid, mp->do_wrap, charset);
 858   if (mp->msgid_plural != NULL)
 859     wrap (fp, NULL, "msgid_plural", mp->msgid_plural, mp->do_wrap, charset);
 860
 861   if (mp->msgid_plural == NULL)
 862     wrap (fp, NULL, "msgstr", mp->msgstr, mp->do_wrap, charset);
 863   else
 864     {
 865       char prefix_buf[20];
 866       unsigned int i;
 867       const char *p;
 868
 869       for (p = mp->msgstr, i = 0;
 870            p < mp->msgstr + mp->msgstr_len;
 871            p += strlen (p) + 1, i++)
 872         {
 873           sprintf (prefix_buf, "msgstr[%u]", i);
 874           wrap (fp, NULL, prefix_buf, p, mp->do_wrap, charset);
 875         }
 876     }
 877 }
 878
 879
 880 static void
 881 message_print_obsolete (const message_ty *mp, FILE *fp, const char *charset,
 882                         bool blank_line)
 883 {
 884   /* If msgstr is the empty string we print nothing.  */
 885   if (mp->msgstr[0] == '\0')
 886     return;
 887
 888   /* Separate messages with a blank line.  Uniforum doesn't like blank
 889      lines, so use an empty comment (unless there already is one).  */
 890   if (blank_line)
 891     print_blank_line (fp);
 892
 893   /* Print translator comment if available.  */
 894   message_print_comment (mp, fp);
 895
 896   /* Print flag information in special comment.  */
 897   if (mp->is_fuzzy)
 898     {
 899       bool first = true;
 900
 901       putc ('#', fp);
 902       putc (',', fp);
 903
 904       if (mp->is_fuzzy)
 905         {
 906           fputs (" fuzzy", fp);
 907           first = false;
 908         }
 909
 910       putc ('\n', fp);
 911     }
 912
 913   /* Print each of the message components.  Wrap them nicely so they
 914      are as readable as possible.  */
 915   if (!is_ascii_string (mp->msgid)
 916       && po_charset_canonicalize (charset) != po_charset_utf8)
 917     po_multiline_warning (xasprintf (_("warning: ")),
 918                           xasprintf (_("\
 919 The following msgid contains non-ASCII characters.\n\
 920 This will cause problems to translators who use a character encoding\n\
 921 different from yours. Consider using a pure ASCII msgid instead.\n\
 922 %s\n"), mp->msgid));
 923   wrap (fp, "#~ ", "msgid", mp->msgid, mp->do_wrap, charset);
 924   if (mp->msgid_plural != NULL)
 925     wrap (fp, "#~ ", "msgid_plural", mp->msgid_plural, mp->do_wrap, charset);
 926
 927   if (mp->msgid_plural == NULL)
 928     wrap (fp, "#~ ", "msgstr", mp->msgstr, mp->do_wrap, charset);
 929   else
 930     {
 931       char prefix_buf[20];
 932       unsigned int i;
 933       const char *p;
 934
 935       for (p = mp->msgstr, i = 0;
 936            p < mp->msgstr + mp->msgstr_len;
 937            p += strlen (p) + 1, i++)
 938         {
 939           sprintf (prefix_buf, "msgstr[%u]", i);
 940           wrap (fp, "#~ ", prefix_buf, p, mp->do_wrap, charset);
 941         }
 942     }
 943 }
 944
 945
 946 static void
 947 msgdomain_list_print_po (msgdomain_list_ty *mdlp, FILE *fp, bool debug)
 948 {
 949   size_t j, k;
 950   bool blank_line;
 951
 952   /* Write out the messages for each domain.  */
 953   blank_line = false;
 954   for (k = 0; k < mdlp->nitems; k++)
 955     {
 956       message_list_ty *mlp;
 957       const char *header;
 958       char *charset;
 959       char *allocated_charset;
 960
 961       /* If the first domain is the default, don't bother emitting
 962          the domain name, because it is the default.  */
 963       if (!(k == 0
 964             && strcmp (mdlp->item[k]->domain, MESSAGE_DOMAIN_DEFAULT) == 0))
 965         {
 966           if (blank_line)
 967             print_blank_line (fp);
 968           fprintf (fp, "domain \"%s\"\n", mdlp->item[k]->domain);
 969           blank_line = true;
 970         }
 971
 972       mlp = mdlp->item[k]->messages;
 973
 974       /* Search the header entry.  */
 975       header = NULL;
 976       for (j = 0; j < mlp->nitems; ++j)
 977         if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
 978           {
 979             header = mlp->item[j]->msgstr;
 980             break;
 981           }
 982
 983       /* Extract the charset name.  */
 984       charset = "ASCII";
 985       allocated_charset = NULL;
 986       if (header != NULL)
 987         {
 988           const char *charsetstr = strstr (header, "charset=");
 989
 990           if (charsetstr != NULL)
 991             {
 992               size_t len;
 993
 994               charsetstr += strlen ("charset=");
 995               len = strcspn (charsetstr, " \t\n");
 996               charset = allocated_charset = (char *) xallocsa (len + 1);
 997               memcpy (charset, charsetstr, len);
 998               charset[len] = '\0';
 999
1000               /* Treat the dummy default value as if it were absent.  */
1001               if (strcmp (charset, "CHARSET") == 0)
1002                 charset = "ASCII";
1003             }
1004         }
1005
1006       /* Write out each of the messages for this domain.  */
1007       for (j = 0; j < mlp->nitems; ++j)
1008         if (!mlp->item[j]->obsolete)
1009           {
1010             message_print (mlp->item[j], fp, charset, blank_line, debug);
1011             blank_line = true;
1012           }
1013
1014       /* Write out each of the obsolete messages for this domain.  */
1015       for (j = 0; j < mlp->nitems; ++j)
1016         if (mlp->item[j]->obsolete)
1017           {
1018             message_print_obsolete (mlp->item[j], fp, charset, blank_line);
1019             blank_line = true;
1020           }
1021
1022       if (allocated_charset != NULL)
1023         freesa (allocated_charset);
1024     }
1025 }
1026
1027
1028 void
1029 msgdomain_list_print (msgdomain_list_ty *mdlp, const char *filename,
1030                       bool force, bool debug)
1031 {
1032   FILE *fp;
1033
1034   /* We will not write anything if, for every domain, we have no message
1035      or only the header entry.  */
1036   if (!force)
1037     {
1038       bool found_nonempty = false;
1039       size_t k;
1040
1041       for (k = 0; k < mdlp->nitems; k++)
1042         {
1043           message_list_ty *mlp = mdlp->item[k]->messages;
1044
1045           if (!(mlp->nitems == 0
1046                 || (mlp->nitems == 1 && mlp->item[0]->msgid[0] == '\0')))
1047             {
1048               found_nonempty = true;
1049               break;
1050             }
1051         }
1052
1053       if (!found_nonempty)
1054         return;
1055     }
1056
1057   /* Check whether the output format can accomodate all messages.  */
1058   if (use_syntax_properties || use_syntax_stringtable)
1059     {
1060       if (mdlp->nitems > 1)
1061         {
1062           if (use_syntax_properties)
1063             po_error (EXIT_FAILURE, 0, _("Cannot output multiple translation domains into a single file with Java .properties syntax. Try using PO file syntax instead."));
1064           if (use_syntax_stringtable)
1065             po_error (EXIT_FAILURE, 0, _("Cannot output multiple translation domains into a single file with NeXTstep/GNUstep .strings syntax."));
1066         }
1067       if (mdlp->nitems == 1)
1068         {
1069           message_list_ty *mlp = mdlp->item[0]->messages;
1070           const lex_pos_ty *has_plural;
1071           size_t j;
1072
1073           has_plural = NULL;
1074           for (j = 0; j < mlp->nitems; j++)
1075             {
1076               message_ty *mp = mlp->item[j];
1077
1078               if (mp->msgid_plural != NULL)
1079                 {
1080                   has_plural = &mp->pos;
1081                   break;
1082                 }
1083             }
1084
1085           if (has_plural != NULL)
1086             {
1087               error_with_progname = false;
1088               if (use_syntax_properties)
1089                 po_error_at_line (EXIT_FAILURE, 0,
1090                                   has_plural->file_name, has_plural->line_number,
1091                                   _("message catalog has plural form translations, but the output format does not support them. Try generating a Java class using \"msgfmt --java\", instead of a properties file."));
1092               if (use_syntax_stringtable)
1093                 po_error_at_line (EXIT_FAILURE, 0,
1094                                   has_plural->file_name, has_plural->line_number,
1095                                   _("message catalog has plural form translations, but the output format does not support them."));
1096               error_with_progname = true;
1097             }
1098         }
1099     }
1100
1101   /* Open the output file.  */
1102   if (filename != NULL && strcmp (filename, "-") != 0
1103       && strcmp (filename, "/dev/stdout") != 0)
1104     {
1105       fp = fopen (filename, "w");
1106       if (fp == NULL)
1107         po_error (EXIT_FAILURE, errno, _("cannot create output file \"%s\""),
1108                   filename);
1109     }
1110   else
1111     {
1112       fp = stdout;
1113       /* xgettext:no-c-format */
1114       filename = _("standard output");
1115     }
1116
1117   if (use_syntax_properties)
1118     msgdomain_list_print_properties (mdlp, fp, page_width, debug);
1119   else if (use_syntax_stringtable)
1120     msgdomain_list_print_stringtable (mdlp, fp, page_width, debug);
1121   else
1122     msgdomain_list_print_po (mdlp, fp, debug);
1123
1124   /* Make sure nothing went wrong.  */
1125   if (fwriteerror (fp))
1126     po_error (EXIT_FAILURE, errno, _("error while writing \"%s\" file"),
1127               filename);
1128 }
1129
1130
1131 /* =============================== Sorting. ================================ */
1132
1133
1134 static int
1135 cmp_by_msgid (const void *va, const void *vb)
1136 {
1137   const message_ty *a = *(const message_ty **) va;
1138   const message_ty *b = *(const message_ty **) vb;
1139   /* Because msgids normally contain only ASCII characters, it is OK to
1140      sort them as if we were in the C locale. And strcoll() in the C locale
1141      is the same as strcmp().  */
1142   return strcmp (a->msgid, b->msgid);
1143 }
1144
1145
1146 void
1147 msgdomain_list_sort_by_msgid (msgdomain_list_ty *mdlp)
1148 {
1149   size_t k;
1150
1151   for (k = 0; k < mdlp->nitems; k++)
1152     {
1153       message_list_ty *mlp = mdlp->item[k]->messages;
1154
1155       if (mlp->nitems > 0)
1156         qsort (mlp->item, mlp->nitems, sizeof (mlp->item[0]), cmp_by_msgid);
1157     }
1158 }
1159
1160
1161 /* Sort the file positions of every message.  */
1162
1163 static int
1164 cmp_filepos (const void *va, const void *vb)
1165 {
1166   const lex_pos_ty *a = (const lex_pos_ty *) va;
1167   const lex_pos_ty *b = (const lex_pos_ty *) vb;
1168   int cmp;
1169
1170   cmp = strcmp (a->file_name, b->file_name);
1171   if (cmp == 0)
1172     cmp = (int) a->line_number - (int) b->line_number;
1173
1174   return cmp;
1175 }
1176
1177 static void
1178 msgdomain_list_sort_filepos (msgdomain_list_ty *mdlp)
1179 {
1180   size_t j, k;
1181
1182   for (k = 0; k < mdlp->nitems; k++)
1183     {
1184       message_list_ty *mlp = mdlp->item[k]->messages;
1185
1186       for (j = 0; j < mlp->nitems; j++)
1187         {
1188           message_ty *mp = mlp->item[j];
1189
1190           if (mp->filepos_count > 0)
1191             qsort (mp->filepos, mp->filepos_count, sizeof (mp->filepos[0]),
1192                    cmp_filepos);
1193         }
1194     }
1195 }
1196
1197
1198 /* Sort the messages according to the file position.  */
1199
1200 static int
1201 cmp_by_filepos (const void *va, const void *vb)
1202 {
1203   const message_ty *a = *(const message_ty **) va;
1204   const message_ty *b = *(const message_ty **) vb;
1205   int cmp;
1206
1207   /* No filepos is smaller than any other filepos.  */
1208   if (a->filepos_count == 0)
1209     {
1210       if (b->filepos_count != 0)
1211         return -1;
1212     }
1213   if (b->filepos_count == 0)
1214     return 1;
1215
1216   /* Compare on the file names...  */
1217   cmp = strcmp (a->filepos[0].file_name, b->filepos[0].file_name);
1218   if (cmp != 0)
1219     return cmp;
1220
1221   /* If they are equal, compare on the line numbers...  */
1222   cmp = a->filepos[0].line_number - b->filepos[0].line_number;
1223   if (cmp != 0)
1224     return cmp;
1225
1226   /* If they are equal, compare on the msgid strings.  */
1227   /* Because msgids normally contain only ASCII characters, it is OK to
1228      sort them as if we were in the C locale. And strcoll() in the C locale
1229      is the same as strcmp().  */
1230   return strcmp (a->msgid, b->msgid);
1231 }
1232
1233
1234 void
1235 msgdomain_list_sort_by_filepos (msgdomain_list_ty *mdlp)
1236 {
1237   size_t k;
1238
1239   /* It makes sense to compare filepos[0] of different messages only after
1240      the filepos[] array of each message has been sorted.  Sort it now.  */
1241   msgdomain_list_sort_filepos (mdlp);
1242
1243   for (k = 0; k < mdlp->nitems; k++)
1244     {
1245       message_list_ty *mlp = mdlp->item[k]->messages;
1246
1247       if (mlp->nitems > 0)
1248         qsort (mlp->item, mlp->nitems, sizeof (mlp->item[0]), cmp_by_filepos);
1249     }
1250 }