gnu/dist/gettext/gettext-tools/src/xgettext.c

   1 /* Extracts strings from C source file to Uniforum style .po file.
   2    Copyright (C) 1995-1998, 2000-2005 Free Software Foundation, Inc.
   3    Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include <config.h>
  21 #endif
  22 #include <alloca.h>
  23
  24 #include <ctype.h>
  25 #include <errno.h>
  26 #include <getopt.h>
  27 #include <stdio.h>
  28 #include <time.h>
  29 #include <stdlib.h>
  30 #include <stdbool.h>
  31 #include <string.h>
  32 #include <locale.h>
  33 #include <limits.h>
  34
  35 #include "xgettext.h"
  36 #include "closeout.h"
  37 #include "dir-list.h"
  38 #include "file-list.h"
  39 #include "str-list.h"
  40 #include "error.h"
  41 #include "error-progname.h"
  42 #include "progname.h"
  43 #include "relocatable.h"
  44 #include "basename.h"
  45 #include "xerror.h"
  46 #include "xalloc.h"
  47 #include "xallocsa.h"
  48 #include "strstr.h"
  49 #include "xerror.h"
  50 #include "exit.h"
  51 #include "pathname.h"
  52 #include "c-strcase.h"
  53 #include "stpcpy.h"
  54 #include "open-po.h"
  55 #include "read-po-abstract.h"
  56 #include "message.h"
  57 #include "po-charset.h"
  58 #include "msgl-iconv.h"
  59 #include "msgl-ascii.h"
  60 #include "po-time.h"
  61 #include "write-po.h"
  62 #include "format.h"
  63 #include "gettext.h"
  64
  65 /* A convenience macro.  I don't like writing gettext() every time.  */
  66 #define _(str) gettext (str)
  67
  68
  69 #ifdef __cplusplus
  70 extern "C" {
  71 #endif
  72
  73 #include "x-c.h"
  74 #include "x-po.h"
  75 #include "x-sh.h"
  76 #include "x-python.h"
  77 #include "x-lisp.h"
  78 #include "x-elisp.h"
  79 #include "x-librep.h"
  80 #include "x-scheme.h"
  81 #include "x-smalltalk.h"
  82 #include "x-java.h"
  83 #include "x-properties.h"
  84 #include "x-csharp.h"
  85 #include "x-awk.h"
  86 #include "x-ycp.h"
  87 #include "x-tcl.h"
  88 #include "x-perl.h"
  89 #include "x-php.h"
  90 #include "x-stringtable.h"
  91 #include "x-rst.h"
  92 #include "x-glade.h"
  93
  94 #ifdef __cplusplus
  95 }
  96 #endif
  97
  98
  99 /* If nonzero add all comments immediately preceding one of the keywords. */
 100 static bool add_all_comments = false;
 101
 102 /* Tag used in comment of prevailing domain.  */
 103 static char *comment_tag;
 104
 105 /* Name of default domain file.  If not set defaults to messages.po.  */
 106 static const char *default_domain;
 107
 108 /* If called with --debug option the output reflects whether format
 109    string recognition is done automatically or forced by the user.  */
 110 static int do_debug;
 111
 112 /* Content of .po files with symbols to be excluded.  */
 113 message_list_ty *exclude;
 114
 115 /* Force output of PO file even if empty.  */
 116 static int force_po;
 117
 118 /* Copyright holder of the output file and the translations.  */
 119 static const char *copyright_holder = "THE PACKAGE'S COPYRIGHT HOLDER";
 120
 121 /* Email address or URL for reports of bugs in msgids.  */
 122 static const char *msgid_bugs_address = NULL;
 123
 124 /* String used as prefix for msgstr.  */
 125 static const char *msgstr_prefix;
 126
 127 /* String used as suffix for msgstr.  */
 128 static const char *msgstr_suffix;
 129
 130 /* Directory in which output files are created.  */
 131 static char *output_dir;
 132
 133 /* The output syntax: .pot or .properties or .strings.  */
 134 static input_syntax_ty output_syntax = syntax_po;
 135
 136 /* If nonzero omit header with information about this run.  */
 137 int xgettext_omit_header;
 138
 139 /* Table of flag_context_list_ty tables.  */
 140 static flag_context_list_table_ty flag_table_c;
 141 static flag_context_list_table_ty flag_table_objc;
 142 static flag_context_list_table_ty flag_table_gcc_internal;
 143 static flag_context_list_table_ty flag_table_sh;
 144 static flag_context_list_table_ty flag_table_python;
 145 static flag_context_list_table_ty flag_table_lisp;
 146 static flag_context_list_table_ty flag_table_elisp;
 147 static flag_context_list_table_ty flag_table_librep;
 148 static flag_context_list_table_ty flag_table_scheme;
 149 static flag_context_list_table_ty flag_table_java;
 150 static flag_context_list_table_ty flag_table_csharp;
 151 static flag_context_list_table_ty flag_table_awk;
 152 static flag_context_list_table_ty flag_table_ycp;
 153 static flag_context_list_table_ty flag_table_tcl;
 154 static flag_context_list_table_ty flag_table_perl;
 155 static flag_context_list_table_ty flag_table_php;
 156
 157 /* If true, recognize Qt format strings.  */
 158 static bool recognize_format_qt;
 159
 160 /* Canonicalized encoding name for all input files.  */
 161 const char *xgettext_global_source_encoding;
 162
 163 #if HAVE_ICONV
 164 /* Converter from xgettext_global_source_encoding to UTF-8 (except from
 165    ASCII or UTF-8, when this conversion is a no-op).  */
 166 iconv_t xgettext_global_source_iconv;
 167 #endif
 168
 169 /* Canonicalized encoding name for the current input file.  */
 170 const char *xgettext_current_source_encoding;
 171
 172 #if HAVE_ICONV
 173 /* Converter from xgettext_current_source_encoding to UTF-8 (except from
 174    ASCII or UTF-8, when this conversion is a no-op).  */
 175 iconv_t xgettext_current_source_iconv;
 176 #endif
 177
 178 /* Long options.  */
 179 static const struct option long_options[] =
 180 {
 181   { "add-comments", optional_argument, NULL, 'c' },
 182   { "add-location", no_argument, &line_comment, 1 },
 183   { "c++", no_argument, NULL, 'C' },
 184   { "copyright-holder", required_argument, NULL, CHAR_MAX + 1 },
 185   { "debug", no_argument, &do_debug, 1 },
 186   { "default-domain", required_argument, NULL, 'd' },
 187   { "directory", required_argument, NULL, 'D' },
 188   { "escape", no_argument, NULL, 'E' },
 189   { "exclude-file", required_argument, NULL, 'x' },
 190   { "extract-all", no_argument, NULL, 'a' },
 191   { "files-from", required_argument, NULL, 'f' },
 192   { "flag", required_argument, NULL, CHAR_MAX + 8 },
 193   { "force-po", no_argument, &force_po, 1 },
 194   { "foreign-user", no_argument, NULL, CHAR_MAX + 2 },
 195   { "from-code", required_argument, NULL, CHAR_MAX + 3 },
 196   { "help", no_argument, NULL, 'h' },
 197   { "indent", no_argument, NULL, 'i' },
 198   { "join-existing", no_argument, NULL, 'j' },
 199   { "keyword", optional_argument, NULL, 'k' },
 200   { "language", required_argument, NULL, 'L' },
 201   { "msgid-bugs-address", required_argument, NULL, CHAR_MAX + 5 },
 202   { "msgstr-prefix", optional_argument, NULL, 'm' },
 203   { "msgstr-suffix", optional_argument, NULL, 'M' },
 204   { "no-escape", no_argument, NULL, 'e' },
 205   { "no-location", no_argument, &line_comment, 0 },
 206   { "no-wrap", no_argument, NULL, CHAR_MAX + 4 },
 207   { "omit-header", no_argument, &xgettext_omit_header, 1 },
 208   { "output", required_argument, NULL, 'o' },
 209   { "output-dir", required_argument, NULL, 'p' },
 210   { "properties-output", no_argument, NULL, CHAR_MAX + 6 },
 211   { "qt", no_argument, NULL, CHAR_MAX + 9 },
 212   { "sort-by-file", no_argument, NULL, 'F' },
 213   { "sort-output", no_argument, NULL, 's' },
 214   { "strict", no_argument, NULL, 'S' },
 215   { "string-limit", required_argument, NULL, 'l' },
 216   { "stringtable-output", no_argument, NULL, CHAR_MAX + 7 },
 217   { "trigraphs", no_argument, NULL, 'T' },
 218   { "version", no_argument, NULL, 'V' },
 219   { "width", required_argument, NULL, 'w', },
 220   { NULL, 0, NULL, 0 }
 221 };
 222
 223
 224 /* The extractors must all be functions returning void and taking three
 225    arguments designating the input stream and one message domain list argument
 226    in which to add the messages.  */
 227 typedef void (*extractor_func) (FILE *fp, const char *real_filename,
 228                                 const char *logical_filename,
 229                                 flag_context_list_table_ty *flag_table,
 230                                 msgdomain_list_ty *mdlp);
 231
 232 typedef struct extractor_ty extractor_ty;
 233 struct extractor_ty
 234 {
 235   extractor_func func;
 236   flag_context_list_table_ty *flag_table;
 237   struct formatstring_parser *formatstring_parser1;
 238   struct formatstring_parser *formatstring_parser2;
 239 };
 240
 241
 242 /* Forward declaration of local functions.  */
 243 static void usage (int status)
 244 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ > 4) || __GNUC__ > 2)
 245         __attribute__ ((noreturn))
 246 #endif
 247 ;
 248 static void read_exclusion_file (char *file_name);
 249 static void extract_from_file (const char *file_name, extractor_ty extractor,
 250                                msgdomain_list_ty *mdlp);
 251 static message_ty *construct_header (void);
 252 static void finalize_header (msgdomain_list_ty *mdlp);
 253 static extractor_ty language_to_extractor (const char *name);
 254 static const char *extension_to_language (const char *extension);
 255
 256
 257 int
 258 main (int argc, char *argv[])
 259 {
 260   int cnt;
 261   int optchar;
 262   bool do_help = false;
 263   bool do_version = false;
 264   msgdomain_list_ty *mdlp;
 265   bool join_existing = false;
 266   bool no_default_keywords = false;
 267   bool some_additional_keywords = false;
 268   bool sort_by_msgid = false;
 269   bool sort_by_filepos = false;
 270   const char *file_name;
 271   const char *files_from = NULL;
 272   string_list_ty *file_list;
 273   char *output_file = NULL;
 274   const char *language = NULL;
 275   extractor_ty extractor = { NULL, NULL, NULL, NULL };
 276
 277   /* Set program name for messages.  */
 278   set_program_name (argv[0]);
 279   error_print_progname = maybe_print_progname;
 280
 281 #ifdef HAVE_SETLOCALE
 282   /* Set locale via LC_ALL.  */
 283   setlocale (LC_ALL, "");
 284 #endif
 285
 286   /* Set the text message domain.  */
 287   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
 288   textdomain (PACKAGE);
 289
 290   /* Ensure that write errors on stdout are detected.  */
 291   atexit (close_stdout);
 292
 293   /* Set initial value of variables.  */
 294   default_domain = MESSAGE_DOMAIN_DEFAULT;
 295   xgettext_global_source_encoding = po_charset_ascii;
 296   init_flag_table_c ();
 297   init_flag_table_objc ();
 298   init_flag_table_gcc_internal ();
 299   init_flag_table_sh ();
 300   init_flag_table_python ();
 301   init_flag_table_lisp ();
 302   init_flag_table_elisp ();
 303   init_flag_table_librep ();
 304   init_flag_table_scheme ();
 305   init_flag_table_java ();
 306   init_flag_table_csharp ();
 307   init_flag_table_awk ();
 308   init_flag_table_ycp ();
 309   init_flag_table_tcl ();
 310   init_flag_table_perl ();
 311   init_flag_table_php ();
 312
 313   while ((optchar = getopt_long (argc, argv,
 314                                  "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:x:",
 315                                  long_options, NULL)) != EOF)
 316     switch (optchar)
 317       {
 318       case '\0':                /* Long option.  */
 319         break;
 320       case 'a':
 321         x_c_extract_all ();
 322         x_sh_extract_all ();
 323         x_python_extract_all ();
 324         x_lisp_extract_all ();
 325         x_elisp_extract_all ();
 326         x_librep_extract_all ();
 327         x_scheme_extract_all ();
 328         x_java_extract_all ();
 329         x_csharp_extract_all ();
 330         x_awk_extract_all ();
 331         x_tcl_extract_all ();
 332         x_perl_extract_all ();
 333         x_php_extract_all ();
 334         x_glade_extract_all ();
 335         break;
 336       case 'c':
 337         if (optarg == NULL)
 338           {
 339             add_all_comments = true;
 340             comment_tag = NULL;
 341           }
 342         else
 343           {
 344             add_all_comments = false;
 345             comment_tag = optarg;
 346             /* We ignore leading white space.  */
 347             while (isspace ((unsigned char) *comment_tag))
 348               ++comment_tag;
 349           }
 350         break;
 351       case 'C':
 352         language = "C++";
 353         break;
 354       case 'd':
 355         default_domain = optarg;
 356         break;
 357       case 'D':
 358         dir_list_append (optarg);
 359         break;
 360       case 'e':
 361         message_print_style_escape (false);
 362         break;
 363       case 'E':
 364         message_print_style_escape (true);
 365         break;
 366       case 'f':
 367         files_from = optarg;
 368         break;
 369       case 'F':
 370         sort_by_filepos = true;
 371         break;
 372       case 'h':
 373         do_help = true;
 374         break;
 375       case 'i':
 376         message_print_style_indent ();
 377         break;
 378       case 'j':
 379         join_existing = true;
 380         break;
 381       case 'k':
 382         if (optarg == NULL || *optarg != '\0')
 383           {
 384             x_c_keyword (optarg);
 385             x_objc_keyword (optarg);
 386             x_sh_keyword (optarg);
 387             x_python_keyword (optarg);
 388             x_lisp_keyword (optarg);
 389             x_elisp_keyword (optarg);
 390             x_librep_keyword (optarg);
 391             x_scheme_keyword (optarg);
 392             x_java_keyword (optarg);
 393             x_csharp_keyword (optarg);
 394             x_awk_keyword (optarg);
 395             x_tcl_keyword (optarg);
 396             x_perl_keyword (optarg);
 397             x_php_keyword (optarg);
 398             x_glade_keyword (optarg);
 399             if (optarg == NULL)
 400               no_default_keywords = true;
 401             else
 402               some_additional_keywords = true;
 403           }
 404         break;
 405       case 'l':
 406         /* Accepted for backward compatibility with 0.10.35.  */
 407         break;
 408       case 'L':
 409         language = optarg;
 410         break;
 411       case 'm':
 412         /* -m takes an optional argument.  If none is given "" is assumed. */
 413         msgstr_prefix = optarg == NULL ? "" : optarg;
 414         break;
 415       case 'M':
 416         /* -M takes an optional argument.  If none is given "" is assumed. */
 417         msgstr_suffix = optarg == NULL ? "" : optarg;
 418         break;
 419       case 'n':
 420         line_comment = 1;
 421         break;
 422       case 'o':
 423         output_file = optarg;
 424         break;
 425       case 'p':
 426         {
 427           size_t len = strlen (optarg);
 428
 429           if (output_dir != NULL)
 430             free (output_dir);
 431
 432           if (optarg[len - 1] == '/')
 433             output_dir = xstrdup (optarg);
 434           else
 435             output_dir = xasprintf ("%s/", optarg);
 436         }
 437         break;
 438       case 's':
 439         sort_by_msgid = true;
 440         break;
 441       case 'S':
 442         message_print_style_uniforum ();
 443         break;
 444       case 'T':
 445         x_c_trigraphs ();
 446         break;
 447       case 'V':
 448         do_version = true;
 449         break;
 450       case 'w':
 451         {
 452           int value;
 453           char *endp;
 454           value = strtol (optarg, &endp, 10);
 455           if (endp != optarg)
 456             message_page_width_set (value);
 457         }
 458         break;
 459       case 'x':
 460         read_exclusion_file (optarg);
 461         break;
 462       case CHAR_MAX + 1:        /* --copyright-holder */
 463         copyright_holder = optarg;
 464         break;
 465       case CHAR_MAX + 2:        /* --foreign-user */
 466         copyright_holder = "";
 467         break;
 468       case CHAR_MAX + 3:        /* --from-code */
 469         xgettext_global_source_encoding = po_charset_canonicalize (optarg);
 470         if (xgettext_global_source_encoding == NULL)
 471           xgettext_global_source_encoding = po_charset_ascii;
 472         break;
 473       case CHAR_MAX + 4:        /* --no-wrap */
 474         message_page_width_ignore ();
 475         break;
 476       case CHAR_MAX + 5:        /* --msgid-bugs-address */
 477         msgid_bugs_address = optarg;
 478         break;
 479       case CHAR_MAX + 6:        /* --properties-output */
 480         message_print_syntax_properties ();
 481         output_syntax = syntax_properties;
 482         break;
 483       case CHAR_MAX + 7:        /* --stringtable-output */
 484         message_print_syntax_stringtable ();
 485         output_syntax = syntax_stringtable;
 486         break;
 487       case CHAR_MAX + 8:        /* --flag */
 488         xgettext_record_flag (optarg);
 489         break;
 490       case CHAR_MAX + 9:        /* --qt */
 491         recognize_format_qt = true;
 492         break;
 493       default:
 494         usage (EXIT_FAILURE);
 495         /* NOTREACHED */
 496       }
 497
 498   /* Version information requested.  */
 499   if (do_version)
 500     {
 501       printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
 502       /* xgettext: no-wrap */
 503       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
 504 This is free software; see the source for copying conditions.  There is NO\n\
 505 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
 506 "),
 507               "1995-1998, 2000-2005");
 508       printf (_("Written by %s.\n"), "Ulrich Drepper");
 509       exit (EXIT_SUCCESS);
 510     }
 511
 512   /* Help is requested.  */
 513   if (do_help)
 514     usage (EXIT_SUCCESS);
 515
 516   /* Verify selected options.  */
 517   if (!line_comment && sort_by_filepos)
 518     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
 519            "--no-location", "--sort-by-file");
 520
 521   if (sort_by_msgid && sort_by_filepos)
 522     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
 523            "--sort-output", "--sort-by-file");
 524
 525   if (join_existing && strcmp (default_domain, "-") == 0)
 526     error (EXIT_FAILURE, 0, _("\
 527 --join-existing cannot be used when output is written to stdout"));
 528
 529   if (no_default_keywords && !some_additional_keywords)
 530     {
 531       error (0, 0, _("\
 532 xgettext cannot work without keywords to look for"));
 533       usage (EXIT_FAILURE);
 534     }
 535
 536   /* Test whether we have some input files given.  */
 537   if (files_from == NULL && optind >= argc)
 538     {
 539       error (EXIT_SUCCESS, 0, _("no input file given"));
 540       usage (EXIT_FAILURE);
 541     }
 542
 543   /* Determine extractor from language.  */
 544   if (language != NULL)
 545     extractor = language_to_extractor (language);
 546
 547   /* Canonize msgstr prefix/suffix.  */
 548   if (msgstr_prefix != NULL && msgstr_suffix == NULL)
 549     msgstr_suffix = "";
 550   else if (msgstr_prefix == NULL && msgstr_suffix != NULL)
 551     msgstr_prefix = "";
 552
 553   /* Default output directory is the current directory.  */
 554   if (output_dir == NULL)
 555     output_dir = ".";
 556
 557   /* Construct the name of the output file.  If the default domain has
 558      the special name "-" we write to stdout.  */
 559   if (output_file)
 560     {
 561       if (IS_ABSOLUTE_PATH (output_file) || strcmp (output_file, "-") == 0)
 562         file_name = xstrdup (output_file);
 563       else
 564         /* Please do NOT add a .po suffix! */
 565         file_name = concatenated_pathname (output_dir, output_file, NULL);
 566     }
 567   else if (strcmp (default_domain, "-") == 0)
 568     file_name = "-";
 569   else
 570     file_name = concatenated_pathname (output_dir, default_domain, ".po");
 571
 572   /* Determine list of files we have to process.  */
 573   if (files_from != NULL)
 574     file_list = read_names_from_file (files_from);
 575   else
 576     file_list = string_list_alloc ();
 577   /* Append names from command line.  */
 578   for (cnt = optind; cnt < argc; ++cnt)
 579     string_list_append_unique (file_list, argv[cnt]);
 580
 581   /* Allocate converter from xgettext_global_source_encoding to UTF-8 (except
 582      from ASCII or UTF-8, when this conversion is a no-op).  */
 583   if (xgettext_global_source_encoding != po_charset_ascii
 584       && xgettext_global_source_encoding != po_charset_utf8)
 585     {
 586 #if HAVE_ICONV
 587       iconv_t cd;
 588
 589       /* Avoid glibc-2.1 bug with EUC-KR.  */
 590 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
 591       if (strcmp (xgettext_global_source_encoding, "EUC-KR") == 0)
 592         cd = (iconv_t)(-1);
 593       else
 594 # endif
 595       cd = iconv_open (po_charset_utf8, xgettext_global_source_encoding);
 596       if (cd == (iconv_t)(-1))
 597         error (EXIT_FAILURE, 0, _("\
 598 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
 599 and iconv() does not support this conversion."),
 600                xgettext_global_source_encoding, po_charset_utf8,
 601                basename (program_name));
 602       xgettext_global_source_iconv = cd;
 603 #else
 604       error (EXIT_FAILURE, 0, _("\
 605 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
 606 This version was built without iconv()."),
 607              xgettext_global_source_encoding, po_charset_utf8,
 608              basename (program_name));
 609 #endif
 610     }
 611
 612   /* Allocate a message list to remember all the messages.  */
 613   mdlp = msgdomain_list_alloc (true);
 614
 615   /* Generate a header, so that we know how and when this PO file was
 616      created.  */
 617   if (!xgettext_omit_header)
 618     message_list_append (mdlp->item[0]->messages, construct_header ());
 619
 620   /* Read in the old messages, so that we can add to them.  */
 621   if (join_existing)
 622     {
 623       /* Temporarily reset the directory list to empty, because file_name
 624          is an output file and therefore should not be searched for.  */
 625       void *saved_directory_list = dir_list_save_reset ();
 626       extractor_ty po_extractor = { extract_po, NULL, NULL, NULL };
 627
 628       extract_from_file (file_name, po_extractor, mdlp);
 629       if (!is_ascii_msgdomain_list (mdlp))
 630         mdlp = iconv_msgdomain_list (mdlp, "UTF-8", file_name);
 631
 632       dir_list_restore (saved_directory_list);
 633     }
 634
 635   /* Process all input files.  */
 636   for (cnt = 0; cnt < file_list->nitems; ++cnt)
 637     {
 638       const char *filename;
 639       extractor_ty this_file_extractor;
 640
 641       filename = file_list->item[cnt];
 642
 643       if (extractor.func)
 644         this_file_extractor = extractor;
 645       else
 646         {
 647           const char *base;
 648           char *reduced;
 649           const char *extension;
 650           const char *language;
 651
 652           base = strrchr (filename, '/');
 653           if (!base)
 654             base = filename;
 655
 656           reduced = xstrdup (base);
 657           /* Remove a trailing ".in" - it's a generic suffix.  */
 658           if (strlen (reduced) >= 3
 659               && memcmp (reduced + strlen (reduced) - 3, ".in", 3) == 0)
 660             reduced[strlen (reduced) - 3] = '\0';
 661
 662           /* Work out what the file extension is.  */
 663           extension = strrchr (reduced, '.');
 664           if (extension)
 665             ++extension;
 666           else
 667             extension = "";
 668
 669           /* Derive the language from the extension, and the extractor
 670              function from the language.  */
 671           language = extension_to_language (extension);
 672           if (language == NULL)
 673             {
 674               error (0, 0, _("\
 675 warning: file `%s' extension `%s' is unknown; will try C"), filename, extension);
 676               language = "C";
 677             }
 678           this_file_extractor = language_to_extractor (language);
 679
 680           free (reduced);
 681         }
 682
 683       /* Extract the strings from the file.  */
 684       extract_from_file (filename, this_file_extractor, mdlp);
 685     }
 686   string_list_free (file_list);
 687
 688   /* Finalize the constructed header.  */
 689   if (!xgettext_omit_header)
 690     finalize_header (mdlp);
 691
 692   /* Free the allocated converter.  */
 693 #if HAVE_ICONV
 694   if (xgettext_global_source_encoding != po_charset_ascii
 695       && xgettext_global_source_encoding != po_charset_utf8)
 696     iconv_close (xgettext_global_source_iconv);
 697 #endif
 698
 699   /* Sorting the list of messages.  */
 700   if (sort_by_filepos)
 701     msgdomain_list_sort_by_filepos (mdlp);
 702   else if (sort_by_msgid)
 703     msgdomain_list_sort_by_msgid (mdlp);
 704
 705   /* Write the PO file.  */
 706   msgdomain_list_print (mdlp, file_name, force_po, do_debug);
 707
 708   exit (EXIT_SUCCESS);
 709 }
 710
 711
 712 /* Display usage information and exit.  */
 713 static void
 714 usage (int status)
 715 {
 716   if (status != EXIT_SUCCESS)
 717     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 718              program_name);
 719   else
 720     {
 721       printf (_("\
 722 Usage: %s [OPTION] [INPUTFILE]...\n\
 723 "), program_name);
 724       printf ("\n");
 725       printf (_("\
 726 Extract translatable strings from given input files.\n\
 727 "));
 728       printf ("\n");
 729       /* xgettext: no-wrap */
 730       printf (_("\
 731 Mandatory arguments to long options are mandatory for short options too.\n\
 732 Similarly for optional arguments.\n\
 733 "));
 734       printf ("\n");
 735       printf (_("\
 736 Input file location:\n"));
 737       printf (_("\
 738   INPUTFILE ...               input files\n"));
 739       printf (_("\
 740   -f, --files-from=FILE       get list of input files from FILE\n"));
 741       printf (_("\
 742   -D, --directory=DIRECTORY   add DIRECTORY to list for input files search\n"));
 743       printf (_("\
 744 If input file is -, standard input is read.\n"));
 745       printf ("\n");
 746       printf (_("\
 747 Output file location:\n"));
 748       printf (_("\
 749   -d, --default-domain=NAME   use NAME.po for output (instead of messages.po)\n"));
 750       printf (_("\
 751   -o, --output=FILE           write output to specified file\n"));
 752       printf (_("\
 753   -p, --output-dir=DIR        output files will be placed in directory DIR\n"));
 754       printf (_("\
 755 If output file is -, output is written to standard output.\n"));
 756       printf ("\n");
 757       printf (_("\
 758 Choice of input file language:\n"));
 759       printf (_("\
 760   -L, --language=NAME         recognise the specified language\n\
 761                                 (C, C++, ObjectiveC, PO, Shell, Python, Lisp,\n\
 762                                 EmacsLisp, librep, Scheme, Smalltalk, Java,\n\
 763                                 JavaProperties, C#, awk, YCP, Tcl, Perl, PHP,\n\
 764                                 GCC-source, NXStringTable, RST, Glade)\n"));
 765       printf (_("\
 766   -C, --c++                   shorthand for --language=C++\n"));
 767       printf (_("\
 768 By default the language is guessed depending on the input file name extension.\n"));
 769       printf ("\n");
 770       printf (_("\
 771 Input file interpretation:\n"));
 772       printf (_("\
 773       --from-code=NAME        encoding of input files\n\
 774                                 (except for Python, Tcl, Glade)\n"));
 775       printf (_("\
 776 By default the input files are assumed to be in ASCII.\n"));
 777       printf ("\n");
 778       printf (_("\
 779 Operation mode:\n"));
 780       printf (_("\
 781   -j, --join-existing         join messages with existing file\n"));
 782       printf (_("\
 783   -x, --exclude-file=FILE.po  entries from FILE.po are not extracted\n"));
 784       printf (_("\
 785   -c, --add-comments[=TAG]    place comment block with TAG (or those\n\
 786                               preceding keyword lines) in output file\n"));
 787       printf ("\n");
 788       printf (_("\
 789 Language specific options:\n"));
 790       printf (_("\
 791   -a, --extract-all           extract all strings\n"));
 792       printf (_("\
 793                                 (only languages C, C++, ObjectiveC, Shell,\n\
 794                                 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
 795                                 C#, awk, Tcl, Perl, PHP, GCC-source, Glade)\n"));
 796       printf (_("\
 797   -k, --keyword[=WORD]        additional keyword to be looked for (without\n\
 798                               WORD means not to use default keywords)\n"));
 799       printf (_("\
 800                                 (only languages C, C++, ObjectiveC, Shell,\n\
 801                                 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
 802                                 C#, awk, Tcl, Perl, PHP, GCC-source, Glade)\n"));
 803       printf (_("\
 804       --flag=WORD:ARG:FLAG    additional flag for strings inside the argument\n\
 805                               number ARG of keyword WORD\n"));
 806       printf (_("\
 807                                 (only languages C, C++, ObjectiveC, Shell,\n\
 808                                 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
 809                                 C#, awk, YCP, Tcl, Perl, PHP, GCC-source)\n"));
 810       printf (_("\
 811   -T, --trigraphs             understand ANSI C trigraphs for input\n"));
 812       printf (_("\
 813                                 (only languages C, C++, ObjectiveC)\n"));
 814       printf (_("\
 815       --qt                    recognize Qt format strings\n"));
 816       printf (_("\
 817                                 (only language C++)\n"));
 818       printf (_("\
 819       --debug                 more detailed formatstring recognition result\n"));
 820       printf ("\n");
 821       printf (_("\
 822 Output details:\n"));
 823       printf (_("\
 824   -e, --no-escape             do not use C escapes in output (default)\n"));
 825       printf (_("\
 826   -E, --escape                use C escapes in output, no extended chars\n"));
 827       printf (_("\
 828       --force-po              write PO file even if empty\n"));
 829       printf (_("\
 830   -i, --indent                write the .po file using indented style\n"));
 831       printf (_("\
 832       --no-location           do not write '#: filename:line' lines\n"));
 833       printf (_("\
 834   -n, --add-location          generate '#: filename:line' lines (default)\n"));
 835       printf (_("\
 836       --strict                write out strict Uniforum conforming .po file\n"));
 837       printf (_("\
 838       --properties-output     write out a Java .properties file\n"));
 839       printf (_("\
 840       --stringtable-output    write out a NeXTstep/GNUstep .strings file\n"));
 841       printf (_("\
 842   -w, --width=NUMBER          set output page width\n"));
 843       printf (_("\
 844       --no-wrap               do not break long message lines, longer than\n\
 845                               the output page width, into several lines\n"));
 846       printf (_("\
 847   -s, --sort-output           generate sorted output\n"));
 848       printf (_("\
 849   -F, --sort-by-file          sort output by file location\n"));
 850       printf (_("\
 851       --omit-header           don't write header with `msgid \"\"' entry\n"));
 852       printf (_("\
 853       --copyright-holder=STRING  set copyright holder in output\n"));
 854       printf (_("\
 855       --foreign-user          omit FSF copyright in output for foreign user\n"));
 856       printf (_("\
 857       --msgid-bugs-address=EMAIL@ADDRESS  set report address for msgid bugs\n"));
 858       printf (_("\
 859   -m, --msgstr-prefix[=STRING]  use STRING or \"\" as prefix for msgstr entries\n"));
 860       printf (_("\
 861   -M, --msgstr-suffix[=STRING]  use STRING or \"\" as suffix for msgstr entries\n"));
 862       printf ("\n");
 863       printf (_("\
 864 Informative output:\n"));
 865       printf (_("\
 866   -h, --help                  display this help and exit\n"));
 867       printf (_("\
 868   -V, --version               output version information and exit\n"));
 869       printf ("\n");
 870       fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
 871              stdout);
 872     }
 873
 874   exit (status);
 875 }
 876
 877
 878 static void
 879 exclude_directive_domain (abstract_po_reader_ty *pop, char *name)
 880 {
 881   po_gram_error_at_line (&gram_pos,
 882                          _("this file may not contain domain directives"));
 883 }
 884
 885
 886 static void
 887 exclude_directive_message (abstract_po_reader_ty *pop,
 888                            char *msgid,
 889                            lex_pos_ty *msgid_pos,
 890                            char *msgid_plural,
 891                            char *msgstr, size_t msgstr_len,
 892                            lex_pos_ty *msgstr_pos,
 893                            bool force_fuzzy, bool obsolete)
 894 {
 895   message_ty *mp;
 896
 897   /* See if this message ID has been seen before.  */
 898   if (exclude == NULL)
 899     exclude = message_list_alloc (true);
 900   mp = message_list_search (exclude, msgid);
 901   if (mp != NULL)
 902     free (msgid);
 903   else
 904     {
 905       mp = message_alloc (msgid, msgid_plural, "", 1, msgstr_pos);
 906       /* Do not free msgid.  */
 907       message_list_append (exclude, mp);
 908     }
 909
 910   /* All we care about is the msgid.  Throw the msgstr away.
 911      Don't even check for duplicate msgids.  */
 912   free (msgstr);
 913 }
 914
 915
 916 /* So that the one parser can be used for multiple programs, and also
 917    use good data hiding and encapsulation practices, an object
 918    oriented approach has been taken.  An object instance is allocated,
 919    and all actions resulting from the parse will be through
 920    invocations of method functions of that object.  */
 921
 922 static abstract_po_reader_class_ty exclude_methods =
 923 {
 924   sizeof (abstract_po_reader_ty),
 925   NULL, /* constructor */
 926   NULL, /* destructor */
 927   NULL, /* parse_brief */
 928   NULL, /* parse_debrief */
 929   exclude_directive_domain,
 930   exclude_directive_message,
 931   NULL, /* comment */
 932   NULL, /* comment_dot */
 933   NULL, /* comment_filepos */
 934   NULL, /* comment_special */
 935 };
 936
 937
 938 static void
 939 read_exclusion_file (char *filename)
 940 {
 941   char *real_filename;
 942   FILE *fp = open_po_file (filename, &real_filename, true);
 943   abstract_po_reader_ty *pop;
 944
 945   pop = po_reader_alloc (&exclude_methods);
 946   po_scan (pop, fp, real_filename, filename, input_syntax);
 947   po_reader_free (pop);
 948
 949   if (fp != stdin)
 950     fclose (fp);
 951 }
 952
 953
 954 void
 955 split_keywordspec (const char *spec,
 956                    const char **endp, int *argnum1p, int *argnum2p)
 957 {
 958   const char *p;
 959
 960   /* Start parsing from the end.  */
 961   p = spec + strlen (spec);
 962   if (p > spec && isdigit ((unsigned char) p[-1]))
 963     {
 964       const char *last_arg;
 965
 966       do
 967         p--;
 968       while (p > spec && isdigit ((unsigned char) p[-1]));
 969
 970       last_arg = p;
 971
 972       if (p > spec && p[-1] == ',')
 973         {
 974           p--;
 975
 976           if (p > spec && isdigit ((unsigned char) p[-1]))
 977             {
 978               const char *first_arg;
 979
 980               do
 981                 p--;
 982               while (p > spec && isdigit ((unsigned char) p[-1]));
 983
 984               first_arg = p;
 985
 986               if (p > spec && p[-1] == ':')
 987                 {
 988                   /* Parsed "KEYWORD:ARGNUM1,ARGNUM2".  */
 989                   char *dummy;
 990
 991                   *endp = p - 1;
 992                   *argnum1p = strtol (first_arg, &dummy, 10);
 993                   *argnum2p = strtol (last_arg, &dummy, 10);
 994                   return;
 995                 }
 996             }
 997         }
 998       else if (p > spec && p[-1] == ':')
 999         {
1000           /* Parsed "KEYWORD:ARGNUM1.  */
1001           char *dummy;
1002
1003           *endp = p - 1;
1004           *argnum1p = strtol (last_arg, &dummy, 10);
1005           *argnum2p = 0;
1006           return;
1007         }
1008     }
1009   /* Parsed "KEYWORD".  */
1010   *endp = p + strlen (p);
1011   *argnum1p = 0;
1012   *argnum2p = 0;
1013 }
1014
1015
1016 /* Null context.  */
1017 flag_context_ty null_context = { undecided, false, undecided, false };
1018
1019 /* Transparent context.  */
1020 flag_context_ty passthrough_context = { undecided, true, undecided, true };
1021
1022
1023 flag_context_ty
1024 inherited_context (flag_context_ty outer_context,
1025                    flag_context_ty modifier_context)
1026 {
1027   flag_context_ty result = modifier_context;
1028
1029   if (result.pass_format1)
1030     {
1031       result.is_format1 = outer_context.is_format1;
1032       result.pass_format1 = false;
1033     }
1034   if (result.pass_format2)
1035     {
1036       result.is_format2 = outer_context.is_format2;
1037       result.pass_format2 = false;
1038     }
1039   return result;
1040 }
1041
1042
1043 /* Null context list iterator.  */
1044 flag_context_list_iterator_ty null_context_list_iterator = { 1, NULL };
1045
1046 /* Transparent context list iterator.  */
1047 static flag_context_list_ty passthrough_context_circular_list =
1048   {
1049     1,
1050     { undecided, true, undecided, true },
1051     &passthrough_context_circular_list
1052   };
1053 flag_context_list_iterator_ty passthrough_context_list_iterator =
1054   {
1055     1,
1056     &passthrough_context_circular_list
1057   };
1058
1059
1060 flag_context_list_iterator_ty
1061 flag_context_list_iterator (flag_context_list_ty *list)
1062 {
1063   flag_context_list_iterator_ty result;
1064
1065   result.argnum = 1;
1066   result.head = list;
1067   return result;
1068 }
1069
1070
1071 flag_context_ty
1072 flag_context_list_iterator_advance (flag_context_list_iterator_ty *iter)
1073 {
1074   if (iter->head == NULL)
1075     return null_context;
1076   if (iter->argnum == iter->head->argnum)
1077     {
1078       flag_context_ty result = iter->head->flags;
1079
1080       /* Special casing of circular list.  */
1081       if (iter->head != iter->head->next)
1082         {
1083           iter->head = iter->head->next;
1084           iter->argnum++;
1085         }
1086
1087       return result;
1088     }
1089   else
1090     {
1091       iter->argnum++;
1092       return null_context;
1093     }
1094 }
1095
1096
1097 flag_context_list_ty *
1098 flag_context_list_table_lookup (flag_context_list_table_ty *flag_table,
1099                                 const void *key, size_t keylen)
1100 {
1101   void *entry;
1102
1103   if (flag_table->table != NULL
1104       && find_entry (flag_table, key, keylen, &entry) == 0)
1105     return (flag_context_list_ty *) entry;
1106   else
1107     return NULL;
1108 }
1109
1110
1111 static void
1112 flag_context_list_table_insert (flag_context_list_table_ty *table,
1113                                 unsigned int index,
1114                                 const char *name_start, const char *name_end,
1115                                 int argnum, enum is_format value, bool pass)
1116 {
1117   char *allocated_name = NULL;
1118
1119   if (table == &flag_table_lisp)
1120     {
1121       /* Convert NAME to upper case.  */
1122       size_t name_len = name_end - name_start;
1123       char *name = allocated_name = (char *) xallocsa (name_len);
1124       size_t i;
1125
1126       for (i = 0; i < name_len; i++)
1127         name[i] = (name_start[i] >= 'a' && name_start[i] <= 'z'
1128                    ? name_start[i] - 'a' + 'A'
1129                    : name_start[i]);
1130       name_start = name;
1131       name_end = name + name_len;
1132     }
1133   else if (table == &flag_table_tcl)
1134     {
1135       /* Remove redundant "::" prefix.  */
1136       if (name_end - name_start > 2
1137           && name_start[0] == ':' && name_start[1] == ':')
1138         name_start += 2;
1139     }
1140
1141   /* Insert the pair (VALUE, PASS) at INDEX in the element numbered ARGNUM
1142      of the list corresponding to NAME in the TABLE.  */
1143   if (table->table == NULL)
1144     init_hash (table, 100);
1145   {
1146     void *entry;
1147
1148     if (find_entry (table, name_start, name_end - name_start, &entry) != 0)
1149       {
1150         /* Create new hash table entry.  */
1151         flag_context_list_ty *list =
1152           (flag_context_list_ty *) xmalloc (sizeof (flag_context_list_ty));
1153         list->argnum = argnum;
1154         memset (&list->flags, '\0', sizeof (list->flags));
1155         switch (index)
1156           {
1157           case 0:
1158             list->flags.is_format1 = value;
1159             list->flags.pass_format1 = pass;
1160             break;
1161           case 1:
1162             list->flags.is_format2 = value;
1163             list->flags.pass_format2 = pass;
1164             break;
1165           default:
1166             abort ();
1167           }
1168         list->next = NULL;
1169         insert_entry (table, name_start, name_end - name_start, list);
1170       }
1171     else
1172       {
1173         flag_context_list_ty *list = (flag_context_list_ty *)entry;
1174         flag_context_list_ty **lastp = NULL;
1175
1176         while (list != NULL && list->argnum < argnum)
1177           {
1178             lastp = &list->next;
1179             list = *lastp;
1180           }
1181         if (list != NULL && list->argnum == argnum)
1182           {
1183             /* Add this flag to the current argument number.  */
1184             switch (index)
1185               {
1186               case 0:
1187                 list->flags.is_format1 = value;
1188                 list->flags.pass_format1 = pass;
1189                 break;
1190               case 1:
1191                 list->flags.is_format2 = value;
1192                 list->flags.pass_format2 = pass;
1193                 break;
1194               default:
1195                 abort ();
1196               }
1197           }
1198         else if (lastp != NULL)
1199           {
1200             /* Add a new list entry for this argument number.  */
1201             list =
1202               (flag_context_list_ty *) xmalloc (sizeof (flag_context_list_ty));
1203             list->argnum = argnum;
1204             memset (&list->flags, '\0', sizeof (list->flags));
1205             switch (index)
1206               {
1207               case 0:
1208                 list->flags.is_format1 = value;
1209                 list->flags.pass_format1 = pass;
1210                 break;
1211               case 1:
1212                 list->flags.is_format2 = value;
1213                 list->flags.pass_format2 = pass;
1214                 break;
1215               default:
1216                 abort ();
1217               }
1218             list->next = *lastp;
1219             *lastp = list;
1220           }
1221         else
1222           {
1223             /* Add a new list entry for this argument number, at the beginning
1224                of the list.  Since we don't have an API for replacing the
1225                value of a key in the hash table, we have to copy the first
1226                list element.  */
1227             flag_context_list_ty *copy =
1228               (flag_context_list_ty *) xmalloc (sizeof (flag_context_list_ty));
1229             *copy = *list;
1230
1231             list->argnum = argnum;
1232             memset (&list->flags, '\0', sizeof (list->flags));
1233             switch (index)
1234               {
1235               case 0:
1236                 list->flags.is_format1 = value;
1237                 list->flags.pass_format1 = pass;
1238                 break;
1239               case 1:
1240                 list->flags.is_format2 = value;
1241                 list->flags.pass_format2 = pass;
1242                 break;
1243               default:
1244                 abort ();
1245               }
1246             list->next = copy;
1247           }
1248       }
1249   }
1250
1251   if (allocated_name != NULL)
1252     freesa (allocated_name);
1253 }
1254
1255
1256 void
1257 xgettext_record_flag (const char *optionstring)
1258 {
1259   /* Check the string has at least two colons.  (Colons in the name are
1260      allowed, needed for the Lisp and the Tcl backends.)  */
1261   const char *colon1;
1262   const char *colon2;
1263
1264   for (colon2 = optionstring + strlen (optionstring); ; )
1265     {
1266       if (colon2 == optionstring)
1267         goto err;
1268       colon2--;
1269       if (*colon2 == ':')
1270         break;
1271     }
1272   for (colon1 = colon2; ; )
1273     {
1274       if (colon1 == optionstring)
1275         goto err;
1276       colon1--;
1277       if (*colon1 == ':')
1278         break;
1279     }
1280   {
1281     const char *name_start = optionstring;
1282     const char *name_end = colon1;
1283     const char *argnum_start = colon1 + 1;
1284     const char *argnum_end = colon2;
1285     const char *flag = colon2 + 1;
1286     int argnum;
1287
1288     /* Check the parts' syntax.  */
1289     if (name_end == name_start)
1290       goto err;
1291     if (argnum_end == argnum_start)
1292       goto err;
1293     {
1294       char *endp;
1295       argnum = strtol (argnum_start, &endp, 10);
1296       if (endp != argnum_end)
1297         goto err;
1298     }
1299     if (argnum <= 0)
1300       goto err;
1301
1302     /* Analyze the flag part.  */
1303     {
1304       bool pass;
1305
1306       pass = false;
1307       if (strlen (flag) >= 5 && memcmp (flag, "pass-", 5) == 0)
1308         {
1309           pass = true;
1310           flag += 5;
1311         }
1312
1313       /* Unlike po_parse_comment_special(), we don't accept "fuzzy" or "wrap"
1314          here - it has no sense.  */
1315       if (strlen (flag) >= 7
1316           && memcmp (flag + strlen (flag) - 7, "-format", 7) == 0)
1317         {
1318           const char *p;
1319           size_t n;
1320           enum is_format value;
1321           size_t type;
1322
1323           p = flag;
1324           n = strlen (flag) - 7;
1325
1326           if (n >= 3 && memcmp (p, "no-", 3) == 0)
1327             {
1328               p += 3;
1329               n -= 3;
1330               value = no;
1331             }
1332           else if (n >= 9 && memcmp (p, "possible-", 9) == 0)
1333             {
1334               p += 9;
1335               n -= 9;
1336               value = possible;
1337             }
1338           else if (n >= 11 && memcmp (p, "impossible-", 11) == 0)
1339             {
1340               p += 11;
1341               n -= 11;
1342               value = impossible;
1343             }
1344           else
1345             value = yes_according_to_context;
1346
1347           for (type = 0; type < NFORMATS; type++)
1348             if (strlen (format_language[type]) == n
1349                 && memcmp (format_language[type], p, n) == 0)
1350               {
1351                 switch (type)
1352                   {
1353                   case format_c:
1354                     flag_context_list_table_insert (&flag_table_c, 0,
1355                                                     name_start, name_end,
1356                                                     argnum, value, pass);
1357                     flag_context_list_table_insert (&flag_table_objc, 0,
1358                                                     name_start, name_end,
1359                                                     argnum, value, pass);
1360                     break;
1361                   case format_objc:
1362                     flag_context_list_table_insert (&flag_table_objc, 1,
1363                                                     name_start, name_end,
1364                                                     argnum, value, pass);
1365                     break;
1366                   case format_sh:
1367                     flag_context_list_table_insert (&flag_table_sh, 0,
1368                                                     name_start, name_end,
1369                                                     argnum, value, pass);
1370                     break;
1371                   case format_python:
1372                     flag_context_list_table_insert (&flag_table_python, 0,
1373                                                     name_start, name_end,
1374                                                     argnum, value, pass);
1375                     break;
1376                   case format_lisp:
1377                     flag_context_list_table_insert (&flag_table_lisp, 0,
1378                                                     name_start, name_end,
1379                                                     argnum, value, pass);
1380                     break;
1381                   case format_elisp:
1382                     flag_context_list_table_insert (&flag_table_elisp, 0,
1383                                                     name_start, name_end,
1384                                                     argnum, value, pass);
1385                     break;
1386                   case format_librep:
1387                     flag_context_list_table_insert (&flag_table_librep, 0,
1388                                                     name_start, name_end,
1389                                                     argnum, value, pass);
1390                     break;
1391                   case format_scheme:
1392                     flag_context_list_table_insert (&flag_table_scheme, 0,
1393                                                     name_start, name_end,
1394                                                     argnum, value, pass);
1395                     break;
1396                   case format_smalltalk:
1397                     break;
1398                   case format_java:
1399                     flag_context_list_table_insert (&flag_table_java, 0,
1400                                                     name_start, name_end,
1401                                                     argnum, value, pass);
1402                     break;
1403                   case format_csharp:
1404                     flag_context_list_table_insert (&flag_table_csharp, 0,
1405                                                     name_start, name_end,
1406                                                     argnum, value, pass);
1407                     break;
1408                   case format_awk:
1409                     flag_context_list_table_insert (&flag_table_awk, 0,
1410                                                     name_start, name_end,
1411                                                     argnum, value, pass);
1412                     break;
1413                   case format_pascal:
1414                     break;
1415                   case format_ycp:
1416                     flag_context_list_table_insert (&flag_table_ycp, 0,
1417                                                     name_start, name_end,
1418                                                     argnum, value, pass);
1419                     break;
1420                   case format_tcl:
1421                     flag_context_list_table_insert (&flag_table_tcl, 0,
1422                                                     name_start, name_end,
1423                                                     argnum, value, pass);
1424                     break;
1425                   case format_perl:
1426                     flag_context_list_table_insert (&flag_table_perl, 0,
1427                                                     name_start, name_end,
1428                                                     argnum, value, pass);
1429                     break;
1430                   case format_perl_brace:
1431                     flag_context_list_table_insert (&flag_table_perl, 1,
1432                                                     name_start, name_end,
1433                                                     argnum, value, pass);
1434                     break;
1435                   case format_php:
1436                     flag_context_list_table_insert (&flag_table_php, 0,
1437                                                     name_start, name_end,
1438                                                     argnum, value, pass);
1439                     break;
1440                   case format_gcc_internal:
1441                     flag_context_list_table_insert (&flag_table_gcc_internal, 0,
1442                                                     name_start, name_end,
1443                                                     argnum, value, pass);
1444                     break;
1445                   case format_qt:
1446                     flag_context_list_table_insert (&flag_table_c, 0,
1447                                                     name_start, name_end,
1448                                                     argnum, value, pass);
1449                     break;
1450                   default:
1451                     abort ();
1452                   }
1453                 return;
1454               }
1455           /* If the flag is not among the valid values, the optionstring is
1456              invalid.  */
1457         }
1458     }
1459   }
1460
1461 err:
1462   error (EXIT_FAILURE, 0, _("\
1463 A --flag argument doesn't have the <keyword>:<argnum>:[pass-]<flag> syntax: %s"),
1464          optionstring);
1465 }
1466
1467
1468 static string_list_ty *comment;
1469
1470 void
1471 xgettext_comment_add (const char *str)
1472 {
1473   if (comment == NULL)
1474     comment = string_list_alloc ();
1475   string_list_append (comment, str);
1476 }
1477
1478 const char *
1479 xgettext_comment (size_t n)
1480 {
1481   if (comment == NULL || n >= comment->nitems)
1482     return NULL;
1483   return comment->item[n];
1484 }
1485
1486 void
1487 xgettext_comment_reset ()
1488 {
1489   if (comment != NULL)
1490     {
1491       string_list_free (comment);
1492       comment = NULL;
1493     }
1494 }
1495
1496
1497 refcounted_string_list_ty *savable_comment;
1498
1499 void
1500 savable_comment_add (const char *str)
1501 {
1502   if (savable_comment == NULL)
1503     {
1504       savable_comment =
1505         (refcounted_string_list_ty *) xmalloc (sizeof (*savable_comment));
1506       savable_comment->refcount = 1;
1507       string_list_init (&savable_comment->contents);
1508     }
1509   else if (savable_comment->refcount > 1)
1510     {
1511       /* Unshare the list by making copies.  */
1512       struct string_list_ty *oldcontents;
1513       size_t i;
1514
1515       savable_comment->refcount--;
1516       oldcontents = &savable_comment->contents;
1517
1518       savable_comment =
1519         (refcounted_string_list_ty *) xmalloc (sizeof (*savable_comment));
1520       savable_comment->refcount = 1;
1521       string_list_init (&savable_comment->contents);
1522       for (i = 0; i < oldcontents->nitems; i++)
1523         string_list_append (&savable_comment->contents, oldcontents->item[i]);
1524     }
1525   string_list_append (&savable_comment->contents, str);
1526 }
1527
1528 void
1529 savable_comment_reset ()
1530 {
1531   drop_reference (savable_comment);
1532   savable_comment = NULL;
1533 }
1534
1535 void
1536 savable_comment_to_xgettext_comment (refcounted_string_list_ty *rslp)
1537 {
1538   xgettext_comment_reset ();
1539   if (rslp != NULL)
1540     {
1541       size_t i;
1542
1543       for (i = 0; i < rslp->contents.nitems; i++)
1544         xgettext_comment_add (rslp->contents.item[i]);
1545     }
1546 }
1547
1548
1549
1550 static FILE *
1551 xgettext_open (const char *fn,
1552                char **logical_file_name_p, char **real_file_name_p)
1553 {
1554   FILE *fp;
1555   char *new_name;
1556   char *logical_file_name;
1557
1558   if (strcmp (fn, "-") == 0)
1559     {
1560       new_name = xstrdup (_("standard input"));
1561       logical_file_name = xstrdup (new_name);
1562       fp = stdin;
1563     }
1564   else if (IS_ABSOLUTE_PATH (fn))
1565     {
1566       new_name = xstrdup (fn);
1567       fp = fopen (fn, "r");
1568       if (fp == NULL)
1569         error (EXIT_FAILURE, errno, _("\
1570 error while opening \"%s\" for reading"), fn);
1571       logical_file_name = xstrdup (new_name);
1572     }
1573   else
1574     {
1575       int j;
1576
1577       for (j = 0; ; ++j)
1578         {
1579           const char *dir = dir_list_nth (j);
1580
1581           if (dir == NULL)
1582             error (EXIT_FAILURE, ENOENT, _("\
1583 error while opening \"%s\" for reading"), fn);
1584
1585           new_name = concatenated_pathname (dir, fn, NULL);
1586
1587           fp = fopen (new_name, "r");
1588           if (fp != NULL)
1589             break;
1590
1591           if (errno != ENOENT)
1592             error (EXIT_FAILURE, errno, _("\
1593 error while opening \"%s\" for reading"), new_name);
1594           free (new_name);
1595         }
1596
1597       /* Note that the NEW_NAME variable contains the actual file name
1598          and the logical file name is what is reported by xgettext.  In
1599          this case NEW_NAME is set to the file which was found along the
1600          directory search path, and LOGICAL_FILE_NAME is is set to the
1601          file name which was searched for.  */
1602       logical_file_name = xstrdup (fn);
1603     }
1604
1605   *logical_file_name_p = logical_file_name;
1606   *real_file_name_p = new_name;
1607   return fp;
1608 }
1609
1610
1611 /* Language dependent format string parser.
1612    NULL if the language has no notion of format strings.  */
1613 static struct formatstring_parser *current_formatstring_parser1;
1614 static struct formatstring_parser *current_formatstring_parser2;
1615
1616
1617 static void
1618 extract_from_file (const char *file_name, extractor_ty extractor,
1619                    msgdomain_list_ty *mdlp)
1620 {
1621   char *logical_file_name;
1622   char *real_file_name;
1623   FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name);
1624
1625   /* Set the default for the source file encoding.  May be overridden by
1626      the extractor function.  */
1627   xgettext_current_source_encoding = xgettext_global_source_encoding;
1628 #if HAVE_ICONV
1629   xgettext_current_source_iconv = xgettext_global_source_iconv;
1630 #endif
1631
1632   current_formatstring_parser1 = extractor.formatstring_parser1;
1633   current_formatstring_parser2 = extractor.formatstring_parser2;
1634   extractor.func (fp, real_file_name, logical_file_name, extractor.flag_table,
1635                   mdlp);
1636
1637   if (fp != stdin)
1638     fclose (fp);
1639   free (logical_file_name);
1640   free (real_file_name);
1641 }
1642
1643
1644
1645 #if !HAVE_ICONV
1646 /* If we don't have iconv(), the only supported values for
1647    xgettext_global_source_encoding and thus also for
1648    xgettext_current_source_encoding are ASCII and UTF-8.
1649    convert_string() should not be called in this case.  */
1650 #define convert_string(cd,string) (abort (), (string))
1651 #endif
1652
1653 /* Convert the given string from xgettext_current_source_encoding to
1654    the output file encoding (i.e. ASCII or UTF-8).
1655    The resulting string is either the argument string, or freshly allocated.
1656    The file_name and line_number are only used for error message purposes.  */
1657 char *
1658 from_current_source_encoding (const char *string,
1659                               const char *file_name, size_t line_number)
1660 {
1661   if (xgettext_current_source_encoding == po_charset_ascii)
1662     {
1663       if (!is_ascii_string (string))
1664         {
1665           char buffer[21];
1666
1667           if (line_number == (size_t)(-1))
1668             buffer[0] = '\0';
1669           else
1670             sprintf (buffer, ":%ld", (long) line_number);
1671           multiline_error (xstrdup (""),
1672                            xasprintf (_("\
1673 Non-ASCII string at %s%s.\n\
1674 Please specify the source encoding through --from-code.\n"),
1675                                       file_name, buffer));
1676           exit (EXIT_FAILURE);
1677         }
1678     }
1679   else if (xgettext_current_source_encoding != po_charset_utf8)
1680     string = convert_string (xgettext_current_source_iconv, string);
1681
1682   return (char *) string;
1683 }
1684
1685 #define CONVERT_STRING(string) \
1686   string = from_current_source_encoding (string, pos->file_name, \
1687                                          pos->line_number);
1688
1689
1690 /* Update the is_format[] flags depending on the information given in the
1691    context.  */
1692 static void
1693 set_format_flags_from_context (enum is_format is_format[NFORMATS],
1694                                flag_context_ty context, const char *string,
1695                                lex_pos_ty *pos, const char *pretty_msgstr)
1696 {
1697   size_t i;
1698
1699   if (context.is_format1 != undecided || context.is_format2 != undecided)
1700     for (i = 0; i < NFORMATS; i++)
1701       {
1702         if (is_format[i] == undecided)
1703           {
1704             if (formatstring_parsers[i] == current_formatstring_parser1
1705                 && context.is_format1 != undecided)
1706               is_format[i] = (enum is_format) context.is_format1;
1707             if (formatstring_parsers[i] == current_formatstring_parser2
1708                 && context.is_format2 != undecided)
1709               is_format[i] = (enum is_format) context.is_format2;
1710           }
1711         if (possible_format_p (is_format[i]))
1712           {
1713             struct formatstring_parser *parser = formatstring_parsers[i];
1714             char *invalid_reason = NULL;
1715             void *descr = parser->parse (string, false, &invalid_reason);
1716
1717             if (descr != NULL)
1718               parser->free (descr);
1719             else
1720               {
1721                 /* The string is not a valid format string.  */
1722                 if (is_format[i] != possible)
1723                   {
1724                     char buffer[21];
1725
1726                     error_with_progname = false;
1727                     if (pos->line_number == (size_t)(-1))
1728                       buffer[0] = '\0';
1729                     else
1730                       sprintf (buffer, ":%ld", (long) pos->line_number);
1731                     multiline_warning (xasprintf (_("%s%s: warning: "),
1732                                                   pos->file_name, buffer),
1733                                        xasprintf (is_format[i] == yes_according_to_context ? _("Although being used in a format string position, the %s is not a valid %s format string. Reason: %s\n") : _("Although declared as such, the %s is not a valid %s format string. Reason: %s\n"),
1734                                                   pretty_msgstr,
1735                                                   format_language_pretty[i],
1736                                                   invalid_reason));
1737                     error_with_progname = true;
1738                   }
1739
1740                 is_format[i] = impossible;
1741                 free (invalid_reason);
1742               }
1743           }
1744       }
1745 }
1746
1747
1748 message_ty *
1749 remember_a_message (message_list_ty *mlp, char *string,
1750                     flag_context_ty context, lex_pos_ty *pos)
1751 {
1752   enum is_format is_format[NFORMATS];
1753   enum is_wrap do_wrap;
1754   char *msgid;
1755   message_ty *mp;
1756   char *msgstr;
1757   size_t i;
1758
1759   msgid = string;
1760
1761   /* See whether we shall exclude this message.  */
1762   if (exclude != NULL && message_list_search (exclude, msgid) != NULL)
1763     {
1764       /* Tell the lexer to reset its comment buffer, so that the next
1765          message gets the correct comments.  */
1766       xgettext_comment_reset ();
1767
1768       return NULL;
1769     }
1770
1771   for (i = 0; i < NFORMATS; i++)
1772     is_format[i] = undecided;
1773   do_wrap = undecided;
1774
1775   CONVERT_STRING (msgid);
1776
1777   if (msgid[0] == '\0' && !xgettext_omit_header)
1778     {
1779       char buffer[21];
1780
1781       error_with_progname = false;
1782       if (pos->line_number == (size_t)(-1))
1783         buffer[0] = '\0';
1784       else
1785         sprintf (buffer, ":%ld", (long) pos->line_number);
1786       multiline_warning (xasprintf (_("%s%s: warning: "), pos->file_name,
1787                                     buffer),
1788                          xstrdup (_("\
1789 Empty msgid.  It is reserved by GNU gettext:\n\
1790 gettext(\"\") returns the header entry with\n\
1791 meta information, not the empty string.\n")));
1792       error_with_progname = true;
1793     }
1794
1795   /* See if we have seen this message before.  */
1796   mp = message_list_search (mlp, msgid);
1797   if (mp != NULL)
1798     {
1799       free (msgid);
1800       for (i = 0; i < NFORMATS; i++)
1801         is_format[i] = mp->is_format[i];
1802       do_wrap = mp->do_wrap;
1803     }
1804   else
1805     {
1806       static lex_pos_ty dummypos = { __FILE__, __LINE__ };
1807
1808       /* Construct the msgstr from the prefix and suffix, otherwise use the
1809          empty string.  */
1810       if (msgstr_prefix)
1811         {
1812           msgstr = (char *) xmalloc (strlen (msgstr_prefix)
1813                                      + strlen (msgid)
1814                                      + strlen (msgstr_suffix) + 1);
1815           stpcpy (stpcpy (stpcpy (msgstr, msgstr_prefix), msgid),
1816                   msgstr_suffix);
1817         }
1818       else
1819         msgstr = "";
1820
1821       /* Allocate a new message and append the message to the list.  */
1822       mp = message_alloc (msgid, NULL, msgstr, strlen (msgstr) + 1, &dummypos);
1823       /* Do not free msgid.  */
1824       message_list_append (mlp, mp);
1825     }
1826
1827   /* Determine whether the context specifies that the msgid is a format
1828      string.  */
1829   set_format_flags_from_context (is_format, context, mp->msgid, pos, "msgid");
1830
1831   /* Ask the lexer for the comments it has seen.  */
1832   {
1833     size_t nitems_before;
1834     size_t nitems_after;
1835     int j;
1836     bool add_all_remaining_comments;
1837
1838     nitems_before = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
1839
1840     add_all_remaining_comments = add_all_comments;
1841     for (j = 0; ; ++j)
1842       {
1843         const char *s = xgettext_comment (j);
1844         const char *t;
1845         if (s == NULL)
1846           break;
1847
1848         CONVERT_STRING (s);
1849
1850         /* To reduce the possibility of unwanted matches we do a two
1851            step match: the line must contain `xgettext:' and one of
1852            the possible format description strings.  */
1853         if ((t = strstr (s, "xgettext:")) != NULL)
1854           {
1855             bool tmp_fuzzy;
1856             enum is_format tmp_format[NFORMATS];
1857             enum is_wrap tmp_wrap;
1858             bool interesting;
1859
1860             t += strlen ("xgettext:");
1861
1862             po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_wrap);
1863
1864             interesting = false;
1865             for (i = 0; i < NFORMATS; i++)
1866               if (tmp_format[i] != undecided)
1867                 {
1868                   is_format[i] = tmp_format[i];
1869                   interesting = true;
1870                 }
1871             if (tmp_wrap != undecided)
1872               {
1873                 do_wrap = tmp_wrap;
1874                 interesting = true;
1875               }
1876
1877             /* If the "xgettext:" marker was followed by an interesting
1878                keyword, and we updated our is_format/do_wrap variables,
1879                we don't print the comment as a #. comment.  */
1880             if (interesting)
1881               continue;
1882           }
1883         /* When the comment tag is seen, it drags in not only the line
1884            which it starts, but all remaining comment lines.  */
1885         if (add_all_remaining_comments
1886             || (add_all_remaining_comments =
1887                   (comment_tag != NULL
1888                    && strncmp (s, comment_tag, strlen (comment_tag)) == 0)))
1889           message_comment_dot_append (mp, s);
1890       }
1891
1892     nitems_after = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
1893
1894     /* Don't add the comments if they are a repetition of the tail of the
1895        already present comments.  This avoids unneeded duplication if the
1896        same message appears several times, each time with the same comment.  */
1897     if (nitems_before < nitems_after)
1898       {
1899         size_t added = nitems_after - nitems_before;
1900
1901         if (added <= nitems_before)
1902           {
1903             bool repeated = true;
1904
1905             for (i = 0; i < added; i++)
1906               if (strcmp (mp->comment_dot->item[nitems_before - added + i],
1907                           mp->comment_dot->item[nitems_before + i]) != 0)
1908                 {
1909                   repeated = false;
1910                   break;
1911                 }
1912
1913             if (repeated)
1914               {
1915                 for (i = 0; i < added; i++)
1916                   free ((char *) mp->comment_dot->item[nitems_before + i]);
1917                 mp->comment_dot->nitems = nitems_before;
1918               }
1919           }
1920       }
1921   }
1922
1923   /* If it is not already decided, through programmer comments, whether the
1924      msgid is a format string, examine the msgid.  This is a heuristic.  */
1925   for (i = 0; i < NFORMATS; i++)
1926     {
1927       if (is_format[i] == undecided
1928           && (formatstring_parsers[i] == current_formatstring_parser1
1929               || formatstring_parsers[i] == current_formatstring_parser2)
1930           /* But avoid redundancy: objc-format is stronger than c-format.  */
1931           && !(i == format_c && possible_format_p (is_format[format_objc]))
1932           && !(i == format_objc && possible_format_p (is_format[format_c])))
1933         {
1934           struct formatstring_parser *parser = formatstring_parsers[i];
1935           char *invalid_reason = NULL;
1936           void *descr = parser->parse (mp->msgid, false, &invalid_reason);
1937
1938           if (descr != NULL)
1939             {
1940               /* msgid is a valid format string.  We mark only those msgids
1941                  as format strings which contain at least one format directive
1942                  and thus are format strings with a high probability.  We
1943                  don't mark strings without directives as format strings,
1944                  because that would force the programmer to add
1945                  "xgettext: no-c-format" anywhere where a translator wishes
1946                  to use a percent sign.  So, the msgfmt checking will not be
1947                  perfect.  Oh well.  */
1948               if (parser->get_number_of_directives (descr) > 0)
1949                 is_format[i] = possible;
1950
1951               parser->free (descr);
1952             }
1953           else
1954             {
1955               /* msgid is not a valid format string.  */
1956               is_format[i] = impossible;
1957               free (invalid_reason);
1958             }
1959         }
1960       mp->is_format[i] = is_format[i];
1961     }
1962
1963   mp->do_wrap = do_wrap == no ? no : yes;       /* By default we wrap.  */
1964
1965   /* Remember where we saw this msgid.  */
1966   if (line_comment)
1967     message_comment_filepos (mp, pos->file_name, pos->line_number);
1968
1969   /* Tell the lexer to reset its comment buffer, so that the next
1970      message gets the correct comments.  */
1971   xgettext_comment_reset ();
1972
1973   return mp;
1974 }
1975
1976
1977 void
1978 remember_a_message_plural (message_ty *mp, char *string,
1979                            flag_context_ty context, lex_pos_ty *pos)
1980 {
1981   char *msgid_plural;
1982   char *msgstr1;
1983   size_t msgstr1_len;
1984   char *msgstr;
1985   size_t i;
1986
1987   msgid_plural = string;
1988
1989   CONVERT_STRING (msgid_plural);
1990
1991   /* See if the message is already a plural message.  */
1992   if (mp->msgid_plural == NULL)
1993     {
1994       mp->msgid_plural = msgid_plural;
1995
1996       /* Construct the first plural form from the prefix and suffix,
1997          otherwise use the empty string.  The translator will have to
1998          provide additional plural forms.  */
1999       if (msgstr_prefix)
2000         {
2001           msgstr1 = (char *) xmalloc (strlen (msgstr_prefix)
2002                                       + strlen (msgid_plural)
2003                                       + strlen (msgstr_suffix) + 1);
2004           stpcpy (stpcpy (stpcpy (msgstr1, msgstr_prefix), msgid_plural),
2005                   msgstr_suffix);
2006         }
2007       else
2008         msgstr1 = "";
2009       msgstr1_len = strlen (msgstr1) + 1;
2010       msgstr = (char *) xmalloc (mp->msgstr_len + msgstr1_len);
2011       memcpy (msgstr, mp->msgstr, mp->msgstr_len);
2012       memcpy (msgstr + mp->msgstr_len, msgstr1, msgstr1_len);
2013       mp->msgstr = msgstr;
2014       mp->msgstr_len = mp->msgstr_len + msgstr1_len;
2015
2016       /* Determine whether the context specifies that the msgid_plural is a
2017          format string.  */
2018       set_format_flags_from_context (mp->is_format, context, mp->msgid_plural,
2019                                      pos, "msgid_plural");
2020
2021       /* If it is not already decided, through programmer comments or
2022          the msgid, whether the msgid is a format string, examine the
2023          msgid_plural.  This is a heuristic.  */
2024       for (i = 0; i < NFORMATS; i++)
2025         if ((formatstring_parsers[i] == current_formatstring_parser1
2026              || formatstring_parsers[i] == current_formatstring_parser2)
2027             && (mp->is_format[i] == undecided || mp->is_format[i] == possible)
2028             /* But avoid redundancy: objc-format is stronger than c-format.  */
2029             && !(i == format_c
2030                  && possible_format_p (mp->is_format[format_objc]))
2031             && !(i == format_objc
2032                  && possible_format_p (mp->is_format[format_c])))
2033           {
2034             struct formatstring_parser *parser = formatstring_parsers[i];
2035             char *invalid_reason = NULL;
2036             void *descr =
2037               parser->parse (mp->msgid_plural, false, &invalid_reason);
2038
2039             if (descr != NULL)
2040               {
2041                 /* Same heuristic as in remember_a_message.  */
2042                 if (parser->get_number_of_directives (descr) > 0)
2043                   mp->is_format[i] = possible;
2044
2045                 parser->free (descr);
2046               }
2047             else
2048               {
2049                 /* msgid_plural is not a valid format string.  */
2050                 mp->is_format[i] = impossible;
2051                 free (invalid_reason);
2052               }
2053           }
2054     }
2055   else
2056     free (msgid_plural);
2057 }
2058
2059
2060 static message_ty *
2061 construct_header ()
2062 {
2063   time_t now;
2064   char *timestring;
2065   message_ty *mp;
2066   char *msgstr;
2067   static lex_pos_ty pos = { __FILE__, __LINE__ };
2068
2069   if (msgid_bugs_address != NULL && msgid_bugs_address[0] == '\0')
2070     multiline_warning (xasprintf (_("warning: ")),
2071                        xstrdup (_("\
2072 The option --msgid-bugs-address was not specified.\n\
2073 If you are using a `Makevars' file, please specify\n\
2074 the MSGID_BUGS_ADDRESS variable there; otherwise please\n\
2075 specify an --msgid-bugs-address command line option.\n\
2076 ")));
2077
2078   time (&now);
2079   timestring = po_strftime (&now);
2080
2081   msgstr = xasprintf ("\
2082 Project-Id-Version: PACKAGE VERSION\n\
2083 Report-Msgid-Bugs-To: %s\n\
2084 POT-Creation-Date: %s\n\
2085 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n\
2086 Last-Translator: FULL NAME <EMAIL@ADDRESS>\n\
2087 Language-Team: LANGUAGE <LL@li.org>\n\
2088 MIME-Version: 1.0\n\
2089 Content-Type: text/plain; charset=CHARSET\n\
2090 Content-Transfer-Encoding: 8bit\n",
2091                       msgid_bugs_address != NULL ? msgid_bugs_address : "",
2092                       timestring);
2093   free (timestring);
2094
2095   mp = message_alloc ("", NULL, msgstr, strlen (msgstr) + 1, &pos);
2096
2097   message_comment_append (mp,
2098                           copyright_holder[0] != '\0'
2099                           ? xasprintf ("\
2100 SOME DESCRIPTIVE TITLE.\n\
2101 Copyright (C) YEAR %s\n\
2102 This file is distributed under the same license as the PACKAGE package.\n\
2103 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n",
2104                                        copyright_holder)
2105                           : "\
2106 SOME DESCRIPTIVE TITLE.\n\
2107 This file is put in the public domain.\n\
2108 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n");
2109
2110   mp->is_fuzzy = true;
2111
2112   return mp;
2113 }
2114
2115 static void
2116 finalize_header (msgdomain_list_ty *mdlp)
2117 {
2118   /* If the generated PO file has plural forms, add a Plural-Forms template
2119      to the constructed header.  */
2120   {
2121     bool has_plural;
2122     size_t i, j;
2123
2124     has_plural = false;
2125     for (i = 0; i < mdlp->nitems; i++)
2126       {
2127         message_list_ty *mlp = mdlp->item[i]->messages;
2128
2129         for (j = 0; j < mlp->nitems; j++)
2130           {
2131             message_ty *mp = mlp->item[j];
2132
2133             if (mp->msgid_plural != NULL)
2134               {
2135                 has_plural = true;
2136                 break;
2137               }
2138           }
2139         if (has_plural)
2140           break;
2141       }
2142
2143     if (has_plural)
2144       {
2145         message_ty *header = message_list_search (mdlp->item[0]->messages, "");
2146         if (header != NULL
2147             && strstr (header->msgstr, "Plural-Forms:") == NULL)
2148           {
2149             size_t insertpos = strlen (header->msgstr);
2150             const char *suffix;
2151             size_t suffix_len;
2152             char *new_msgstr;
2153
2154             suffix = "\nPlural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n";
2155             if (insertpos == 0 || header->msgstr[insertpos-1] == '\n')
2156               suffix++;
2157             suffix_len = strlen (suffix);
2158             new_msgstr = (char *) xmalloc (header->msgstr_len + suffix_len);
2159             memcpy (new_msgstr, header->msgstr, insertpos);
2160             memcpy (new_msgstr + insertpos, suffix, suffix_len);
2161             memcpy (new_msgstr + insertpos + suffix_len,
2162                     header->msgstr + insertpos,
2163                     header->msgstr_len - insertpos);
2164             header->msgstr = new_msgstr;
2165             header->msgstr_len = header->msgstr_len + suffix_len;
2166           }
2167       }
2168   }
2169
2170   /* If not all the strings were plain ASCII, or if the output syntax
2171      requires a charset conversion, set the charset in the header to UTF-8.
2172      All messages have already been converted to UTF-8 in remember_a_message
2173      and remember_a_message_plural.  */
2174   {
2175     bool has_nonascii = false;
2176     size_t i;
2177
2178     for (i = 0; i < mdlp->nitems; i++)
2179       {
2180         message_list_ty *mlp = mdlp->item[i]->messages;
2181
2182         if (!is_ascii_message_list (mlp))
2183           has_nonascii = true;
2184       }
2185
2186     if (has_nonascii
2187         || output_syntax == syntax_properties
2188         || output_syntax == syntax_stringtable)
2189       {
2190         message_list_ty *mlp = mdlp->item[0]->messages;
2191
2192         iconv_message_list (mlp, po_charset_utf8, po_charset_utf8, NULL);
2193       }
2194   }
2195 }
2196
2197
2198 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
2199 #define ENDOF(a) ((a) + SIZEOF(a))
2200
2201
2202 static extractor_ty
2203 language_to_extractor (const char *name)
2204 {
2205   struct table_ty
2206   {
2207     const char *name;
2208     extractor_func func;
2209     flag_context_list_table_ty *flag_table;
2210     struct formatstring_parser *formatstring_parser1;
2211     struct formatstring_parser *formatstring_parser2;
2212   };
2213   typedef struct table_ty table_ty;
2214
2215   static table_ty table[] =
2216   {
2217     SCANNERS_C
2218     SCANNERS_PO
2219     SCANNERS_SH
2220     SCANNERS_PYTHON
2221     SCANNERS_LISP
2222     SCANNERS_ELISP
2223     SCANNERS_LIBREP
2224     SCANNERS_SCHEME
2225     SCANNERS_SMALLTALK
2226     SCANNERS_JAVA
2227     SCANNERS_PROPERTIES
2228     SCANNERS_CSHARP
2229     SCANNERS_AWK
2230     SCANNERS_YCP
2231     SCANNERS_TCL
2232     SCANNERS_PERL
2233     SCANNERS_PHP
2234     SCANNERS_STRINGTABLE
2235     SCANNERS_RST
2236     SCANNERS_GLADE
2237     /* Here may follow more languages and their scanners: pike, etc...
2238        Make sure new scanners honor the --exclude-file option.  */
2239   };
2240
2241   table_ty *tp;
2242
2243   for (tp = table; tp < ENDOF(table); ++tp)
2244     if (c_strcasecmp (name, tp->name) == 0)
2245       {
2246         extractor_ty result;
2247
2248         result.func = tp->func;
2249         result.flag_table = tp->flag_table;
2250         result.formatstring_parser1 = tp->formatstring_parser1;
2251         result.formatstring_parser2 = tp->formatstring_parser2;
2252
2253         /* Handle --qt.  It's preferrable to handle this facility here rather
2254            than through an option --language=C++/Qt because the latter would
2255            conflict with the language "C++" regarding the file extensions.  */
2256         if (recognize_format_qt && strcmp (tp->name, "C++") == 0)
2257           result.formatstring_parser2 = &formatstring_qt;
2258
2259         return result;
2260       }
2261
2262   error (EXIT_FAILURE, 0, _("language `%s' unknown"), name);
2263   /* NOTREACHED */
2264   {
2265     extractor_ty result = { NULL, NULL, NULL, NULL };
2266     return result;
2267   }
2268 }
2269
2270
2271 static const char *
2272 extension_to_language (const char *extension)
2273 {
2274   struct table_ty
2275   {
2276     const char *extension;
2277     const char *language;
2278   };
2279   typedef struct table_ty table_ty;
2280
2281   static table_ty table[] =
2282   {
2283     EXTENSIONS_C
2284     EXTENSIONS_PO
2285     EXTENSIONS_SH
2286     EXTENSIONS_PYTHON
2287     EXTENSIONS_LISP
2288     EXTENSIONS_ELISP
2289     EXTENSIONS_LIBREP
2290     EXTENSIONS_SCHEME
2291     EXTENSIONS_SMALLTALK
2292     EXTENSIONS_JAVA
2293     EXTENSIONS_PROPERTIES
2294     EXTENSIONS_CSHARP
2295     EXTENSIONS_AWK
2296     EXTENSIONS_YCP
2297     EXTENSIONS_TCL
2298     EXTENSIONS_PERL
2299     EXTENSIONS_PHP
2300     EXTENSIONS_STRINGTABLE
2301     EXTENSIONS_RST
2302     EXTENSIONS_GLADE
2303     /* Here may follow more file extensions... */
2304   };
2305
2306   table_ty *tp;
2307
2308   for (tp = table; tp < ENDOF(table); ++tp)
2309     if (strcmp (extension, tp->extension) == 0)
2310       return tp->language;
2311   return NULL;
2312 }