gnu/dist/gettext/gettext-tools/src/x-awk.c

   1 /* xgettext awk backend.
   2    Copyright (C) 2002-2003 Free Software Foundation, Inc.
   3
   4    This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software Foundation,
  18    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include "config.h"
  22 #endif
  23
  24 #include <errno.h>
  25 #include <stdbool.h>
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29
  30 #include "message.h"
  31 #include "xgettext.h"
  32 #include "x-awk.h"
  33 #include "error.h"
  34 #include "error-progname.h"
  35 #include "xalloc.h"
  36 #include "exit.h"
  37 #include "gettext.h"
  38
  39 #define _(s) gettext(s)
  40
  41
  42 /* The awk syntax is defined in the gawk manual page and documentation.
  43    See also gawk/awkgram.y.  */
  44
  45
  46 /* ====================== Keyword set customization.  ====================== */
  47
  48 /* If true extract all strings.  */
  49 static bool extract_all = false;
  50
  51 static hash_table keywords;
  52 static bool default_keywords = true;
  53
  54
  55 void
  56 x_awk_extract_all ()
  57 {
  58   extract_all = true;
  59 }
  60
  61
  62 void
  63 x_awk_keyword (const char *name)
  64 {
  65   if (name == NULL)
  66     default_keywords = false;
  67   else
  68     {
  69       const char *end;
  70       int argnum1;
  71       int argnum2;
  72       const char *colon;
  73
  74       if (keywords.table == NULL)
  75         init_hash (&keywords, 100);
  76
  77       split_keywordspec (name, &end, &argnum1, &argnum2);
  78
  79       /* The characters between name and end should form a valid C identifier.
  80          A colon means an invalid parse in split_keywordspec().  */
  81       colon = strchr (name, ':');
  82       if (colon == NULL || colon >= end)
  83         {
  84           if (argnum1 == 0)
  85             argnum1 = 1;
  86           insert_entry (&keywords, name, end - name,
  87                         (void *) (long) (argnum1 + (argnum2 << 10)));
  88         }
  89     }
  90 }
  91
  92 /* Finish initializing the keywords hash table.
  93    Called after argument processing, before each file is processed.  */
  94 static void
  95 init_keywords ()
  96 {
  97   if (default_keywords)
  98     {
  99       x_awk_keyword ("dcgettext");
 100       x_awk_keyword ("dcngettext:1,2");
 101       default_keywords = false;
 102     }
 103 }
 104
 105 void
 106 init_flag_table_awk ()
 107 {
 108   xgettext_record_flag ("dcgettext:1:pass-awk-format");
 109   xgettext_record_flag ("dcngettext:1:pass-awk-format");
 110   xgettext_record_flag ("dcngettext:2:pass-awk-format");
 111   xgettext_record_flag ("printf:1:awk-format");
 112 }
 113
 114
 115 /* ======================== Reading of characters.  ======================== */
 116
 117 /* Real filename, used in error messages about the input file.  */
 118 static const char *real_file_name;
 119
 120 /* Logical filename and line number, used to label the extracted messages.  */
 121 static char *logical_file_name;
 122 static int line_number;
 123
 124 /* The input file stream.  */
 125 static FILE *fp;
 126
 127 /* These are for tracking whether comments count as immediately before
 128    keyword.  */
 129 static int last_comment_line;
 130 static int last_non_comment_line;
 131
 132
 133 /* 1. line_number handling.  */
 134
 135 static int
 136 phase1_getc ()
 137 {
 138   int c = getc (fp);
 139
 140   if (c == EOF)
 141     {
 142       if (ferror (fp))
 143         error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
 144                real_file_name);
 145       return EOF;
 146     }
 147
 148   if (c == '\n')
 149     line_number++;
 150
 151   return c;
 152 }
 153
 154 /* Supports only one pushback character.  */
 155 static void
 156 phase1_ungetc (int c)
 157 {
 158   if (c != EOF)
 159     {
 160       if (c == '\n')
 161         --line_number;
 162
 163       ungetc (c, fp);
 164     }
 165 }
 166
 167
 168 /* 2. Replace each comment that is not inside a string literal or regular
 169    expression with a newline character.  We need to remember the comment
 170    for later, because it may be attached to a keyword string.  */
 171
 172 static int
 173 phase2_getc ()
 174 {
 175   static char *buffer;
 176   static size_t bufmax;
 177   size_t buflen;
 178   int lineno;
 179   int c;
 180
 181   c = phase1_getc ();
 182   if (c == '#')
 183     {
 184       buflen = 0;
 185       lineno = line_number;
 186       for (;;)
 187         {
 188           c = phase1_getc ();
 189           if (c == '\n' || c == EOF)
 190             break;
 191           /* We skip all leading white space, but not EOLs.  */
 192           if (!(buflen == 0 && (c == ' ' || c == '\t')))
 193             {
 194               if (buflen >= bufmax)
 195                 {
 196                   bufmax = 2 * bufmax + 10;
 197                   buffer = xrealloc (buffer, bufmax);
 198                 }
 199               buffer[buflen++] = c;
 200             }
 201         }
 202       if (buflen >= bufmax)
 203         {
 204           bufmax = 2 * bufmax + 10;
 205           buffer = xrealloc (buffer, bufmax);
 206         }
 207       buffer[buflen] = '\0';
 208       xgettext_comment_add (buffer);
 209       last_comment_line = lineno;
 210     }
 211   return c;
 212 }
 213
 214 /* Supports only one pushback character.  */
 215 static void
 216 phase2_ungetc (int c)
 217 {
 218   if (c != EOF)
 219     phase1_ungetc (c);
 220 }
 221
 222
 223 /* ========================== Reading of tokens.  ========================== */
 224
 225
 226 enum token_type_ty
 227 {
 228   token_type_eof,
 229   token_type_lparen,            /* ( */
 230   token_type_rparen,            /* ) */
 231   token_type_comma,             /* , */
 232   token_type_string,            /* "abc" */
 233   token_type_i18nstring,        /* _"abc" */
 234   token_type_symbol,            /* symbol, number */
 235   token_type_semicolon,         /* ; */
 236   token_type_other              /* regexp, misc. operator */
 237 };
 238 typedef enum token_type_ty token_type_ty;
 239
 240 typedef struct token_ty token_ty;
 241 struct token_ty
 242 {
 243   token_type_ty type;
 244   char *string;         /* for token_type_{symbol,string,i18nstring} */
 245   int line_number;
 246 };
 247
 248
 249 /* 7. Replace escape sequences within character strings with their
 250    single character equivalents.  */
 251
 252 #define P7_QUOTES (1000 + '"')
 253
 254 static int
 255 phase7_getc ()
 256 {
 257   int c;
 258
 259   for (;;)
 260     {
 261       /* Use phase 1, because phase 2 elides comments.  */
 262       c = phase1_getc ();
 263
 264       if (c == EOF || c == '\n')
 265         break;
 266       if (c == '"')
 267         return P7_QUOTES;
 268       if (c != '\\')
 269         return c;
 270       c = phase1_getc ();
 271       if (c == EOF)
 272         break;
 273       if (c != '\n')
 274         switch (c)
 275           {
 276           case 'a':
 277             return '\a';
 278           case 'b':
 279             return '\b';
 280           case 'f':
 281             return '\f';
 282           case 'n':
 283             return '\n';
 284           case 'r':
 285             return '\r';
 286           case 't':
 287             return '\t';
 288           case 'v':
 289             return '\v';
 290           case '0': case '1': case '2': case '3': case '4':
 291           case '5': case '6': case '7':
 292             {
 293               int n = c - '0';
 294
 295               c = phase1_getc ();
 296               if (c != EOF)
 297                 {
 298                   if (c >= '0' && c <= '7')
 299                     {
 300                       n = (n << 3) + (c - '0');
 301                       c = phase1_getc ();
 302                       if (c != EOF)
 303                         {
 304                           if (c >= '0' && c <= '7')
 305                             n = (n << 3) + (c - '0');
 306                           else
 307                             phase1_ungetc (c);
 308                         }
 309                     }
 310                   else
 311                     phase1_ungetc (c);
 312                 }
 313               return (unsigned char) n;
 314             }
 315           case 'x':
 316             {
 317               int n = 0;
 318
 319               for (;;)
 320                 {
 321                   c = phase1_getc ();
 322                   if (c == EOF)
 323                     break;
 324                   else if (c >= '0' && c <= '9')
 325                     n = (n << 4) + (c - '0');
 326                   else if (c >= 'A' && c <= 'F')
 327                     n = (n << 4) + (c - 'A' + 10);
 328                   else if (c >= 'a' && c <= 'f')
 329                     n = (n << 4) + (c - 'a' + 10);
 330                   else
 331                     {
 332                       phase1_ungetc (c);
 333                       break;
 334                     }
 335                 }
 336               return (unsigned char) n;
 337             }
 338           default:
 339             return c;
 340           }
 341     }
 342
 343   phase1_ungetc (c);
 344   error_with_progname = false;
 345   error (0, 0, _("%s:%d: warning: unterminated string"), logical_file_name,
 346          line_number);
 347   error_with_progname = true;
 348   return P7_QUOTES;
 349 }
 350
 351
 352 /* Free the memory pointed to by a 'struct token_ty'.  */
 353 static inline void
 354 free_token (token_ty *tp)
 355 {
 356   switch (tp->type)
 357     {
 358     case token_type_string:
 359     case token_type_i18nstring:
 360     case token_type_symbol:
 361       free (tp->string);
 362       break;
 363     default:
 364       break;
 365     }
 366 }
 367
 368
 369 /* Combine characters into tokens.  Discard whitespace.  */
 370
 371 /* There is an ambiguity about '/': It can start a division operator ('/' or
 372    '/=') or it can start a regular expression.  The distinction is important
 373    because inside regular expressions, '#' and '"' lose its special meanings.
 374    If you look at the awk grammar, you see that the operator is only allowed
 375    right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals
 376    can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals.
 377    So we prefer the division operator interpretation only right after
 378    symbol, string, number, ')', ']', with whitespace but no newline allowed
 379    in between.  */
 380 static bool prefer_division_over_regexp;
 381
 382 static void
 383 x_awk_lex (token_ty *tp)
 384 {
 385   static char *buffer;
 386   static int bufmax;
 387   int bufpos;
 388   int c;
 389
 390   for (;;)
 391     {
 392       tp->line_number = line_number;
 393       c = phase2_getc ();
 394
 395       switch (c)
 396         {
 397         case EOF:
 398           tp->type = token_type_eof;
 399           return;
 400
 401         case '\n':
 402           if (last_non_comment_line > last_comment_line)
 403             xgettext_comment_reset ();
 404           /* Newline is not allowed inside expressions.  It usually
 405              introduces a fresh statement.
 406              FIXME: Newlines after any of ',' '{' '?' ':' '||' '&&' 'do' 'else'
 407              does *not* introduce a fresh statement.  */
 408           prefer_division_over_regexp = false;
 409           /* FALLTHROUGH */
 410         case '\t':
 411         case ' ':
 412           /* Ignore whitespace and comments.  */
 413           continue;
 414
 415         case '\\':
 416           /* Backslash ought to be immediately followed by a newline.  */
 417           continue;
 418         }
 419
 420       last_non_comment_line = tp->line_number;
 421
 422       switch (c)
 423         {
 424         case '.':
 425           {
 426             int c2 = phase2_getc ();
 427             phase2_ungetc (c2);
 428             if (!(c2 >= '0' && c2 <= '9'))
 429               {
 430
 431                 tp->type = token_type_other;
 432                 prefer_division_over_regexp = false;
 433                 return;
 434               }
 435           }
 436           /* FALLTHROUGH */
 437         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 438         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 439         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 440         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 441         case 'Y': case 'Z':
 442         case '_':
 443         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 444         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 445         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 446         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 447         case 'y': case 'z':
 448         case '0': case '1': case '2': case '3': case '4':
 449         case '5': case '6': case '7': case '8': case '9':
 450           /* Symbol, or part of a number.  */
 451           bufpos = 0;
 452           for (;;)
 453             {
 454               if (bufpos >= bufmax)
 455                 {
 456                   bufmax = 2 * bufmax + 10;
 457                   buffer = xrealloc (buffer, bufmax);
 458                 }
 459               buffer[bufpos++] = c;
 460               c = phase2_getc ();
 461               switch (c)
 462                 {
 463                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 464                 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 465                 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 466                 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 467                 case 'Y': case 'Z':
 468                 case '_':
 469                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 470                 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 471                 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 472                 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 473                 case 'y': case 'z':
 474                 case '0': case '1': case '2': case '3': case '4':
 475                 case '5': case '6': case '7': case '8': case '9':
 476                   continue;
 477                 default:
 478                   if (bufpos == 1 && buffer[0] == '_' && c == '"')
 479                     {
 480                       tp->type = token_type_i18nstring;
 481                       goto case_string;
 482                     }
 483                   phase2_ungetc (c);
 484                   break;
 485                 }
 486               break;
 487             }
 488           if (bufpos >= bufmax)
 489             {
 490               bufmax = 2 * bufmax + 10;
 491               buffer = xrealloc (buffer, bufmax);
 492             }
 493           buffer[bufpos] = '\0';
 494           tp->string = xstrdup (buffer);
 495           tp->type = token_type_symbol;
 496           /* Most identifiers can be variable names; after them we must
 497              interpret '/' as division operator.  But for awk's builtin
 498              keywords we have three cases:
 499              (a) Must interpret '/' as division operator. "length".
 500              (b) Must interpret '/' as start of a regular expression.
 501                  "do", "exit", "print", "printf", "return".
 502              (c) '/' after this keyword in invalid anyway. All others.
 503              I used the following script for the distinction.
 504                 for k in $awk_keywords; do
 505                   echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null
 506                 done
 507            */
 508           if (strcmp (buffer, "do") == 0
 509               || strcmp (buffer, "exit") == 0
 510               || strcmp (buffer, "print") == 0
 511               || strcmp (buffer, "printf") == 0
 512               || strcmp (buffer, "return") == 0)
 513             prefer_division_over_regexp = false;
 514           else
 515             prefer_division_over_regexp = true;
 516           return;
 517
 518         case '"':
 519           tp->type = token_type_string;
 520         case_string:
 521           bufpos = 0;
 522           for (;;)
 523             {
 524               c = phase7_getc ();
 525               if (c == EOF || c == P7_QUOTES)
 526                 break;
 527               if (bufpos >= bufmax)
 528                 {
 529                   bufmax = 2 * bufmax + 10;
 530                   buffer = xrealloc (buffer, bufmax);
 531                 }
 532               buffer[bufpos++] = c;
 533             }
 534           if (bufpos >= bufmax)
 535             {
 536               bufmax = 2 * bufmax + 10;
 537               buffer = xrealloc (buffer, bufmax);
 538             }
 539           buffer[bufpos] = '\0';
 540           tp->string = xstrdup (buffer);
 541           prefer_division_over_regexp = true;
 542           return;
 543
 544         case '(':
 545           tp->type = token_type_lparen;
 546           prefer_division_over_regexp = false;
 547           return;
 548
 549         case ')':
 550           tp->type = token_type_rparen;
 551           prefer_division_over_regexp = true;
 552           return;
 553
 554         case ',':
 555           tp->type = token_type_comma;
 556           prefer_division_over_regexp = false;
 557           return;
 558
 559         case ';':
 560           tp->type = token_type_semicolon;
 561           prefer_division_over_regexp = false;
 562           return;
 563
 564         case ']':
 565           tp->type = token_type_other;
 566           prefer_division_over_regexp = true;
 567           return;
 568
 569         case '/':
 570           if (!prefer_division_over_regexp)
 571             {
 572               /* Regular expression.
 573                  Counting brackets is non-trivial. [[] is balanced, and so is
 574                  [\]]. Also, /[/]/ is balanced and ends at the third slash.
 575                  Do not count [ or ] if either one is preceded by a \.
 576                  A '[' should be counted if
 577                   a) it is the first one so far (brackets == 0), or
 578                   b) it is the '[' in '[:'.
 579                  A ']' should be counted if not preceded by a \.
 580                  According to POSIX, []] is how you put a ] into a set.
 581                  Try to handle that too.
 582                */
 583               int brackets = 0;
 584               bool pos0 = true;         /* true at start of regexp */
 585               bool pos1_open = false;   /* true after [ at start of regexp */
 586               bool pos2_open_not = false; /* true after [^ at start of regexp */
 587
 588               for (;;)
 589                 {
 590                   c = phase1_getc ();
 591
 592                   if (c == EOF || c == '\n')
 593                     {
 594                       phase1_ungetc (c);
 595                       error_with_progname = false;
 596                       error (0, 0, _("%s:%d: warning: unterminated regular expression"),
 597                              logical_file_name, line_number);
 598                       error_with_progname = true;
 599                       break;
 600                     }
 601                   else if (c == '[')
 602                     {
 603                       if (brackets == 0)
 604                         brackets++;
 605                       else
 606                         {
 607                           c = phase1_getc ();
 608                           if (c == ':')
 609                             brackets++;
 610                           phase1_ungetc (c);
 611                         }
 612                       if (pos0)
 613                         {
 614                           pos0 = false;
 615                           pos1_open = true;
 616                           continue;
 617                         }
 618                     }
 619                   else if (c == ']')
 620                     {
 621                       if (!(pos1_open || pos2_open_not))
 622                         brackets--;
 623                     }
 624                   else if (c == '^')
 625                     {
 626                       if (pos1_open)
 627                         {
 628                           pos1_open = false;
 629                           pos2_open_not = true;
 630                           continue;
 631                         }
 632                     }
 633                   else if (c == '\\')
 634                     {
 635                       c = phase1_getc ();
 636                       /* Backslash-newline is valid and ignored.  */
 637                     }
 638                   else if (c == '/')
 639                     {
 640                       if (brackets <= 0)
 641                         break;
 642                     }
 643
 644                   pos0 = false;
 645                   pos1_open = false;
 646                   pos2_open_not = false;
 647                 }
 648
 649               tp->type = token_type_other;
 650               prefer_division_over_regexp = false;
 651               return;
 652             }
 653           /* FALLTHROUGH */
 654
 655         default:
 656           /* We could carefully recognize each of the 2 and 3 character
 657              operators, but it is not necessary, as we only need to recognize
 658              gettext invocations.  Don't bother.  */
 659           tp->type = token_type_other;
 660           prefer_division_over_regexp = false;
 661           return;
 662         }
 663     }
 664 }
 665
 666
 667 /* ========================= Extracting strings.  ========================== */
 668
 669
 670 /* Context lookup table.  */
 671 static flag_context_list_table_ty *flag_context_list_table;
 672
 673
 674 /* The file is broken into tokens.  Scan the token stream, looking for
 675    a keyword, followed by a left paren, followed by a string.  When we
 676    see this sequence, we have something to remember.  We assume we are
 677    looking at a valid C or C++ program, and leave the complaints about
 678    the grammar to the compiler.
 679
 680      Normal handling: Look for
 681        keyword ( ... msgid ... )
 682      Plural handling: Look for
 683        keyword ( ... msgid ... msgid_plural ... )
 684
 685    We use recursion because the arguments before msgid or between msgid
 686    and msgid_plural can contain subexpressions of the same form.  */
 687
 688
 689 /* Extract messages until the next balanced closing parenthesis.
 690    Extracted messages are added to MLP.
 691    When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and,
 692    if also a plural argument shall be extracted, PLURAL_COMMAS > 0,
 693    otherwise PLURAL_COMMAS = 0.
 694    When no specific argument shall be extracted, COMMAS_TO_SKIP < 0.
 695    Return true upon eof, false upon closing parenthesis.  */
 696 static bool
 697 extract_parenthesized (message_list_ty *mlp,
 698                        flag_context_ty outer_context,
 699                        flag_context_list_iterator_ty context_iter,
 700                        int commas_to_skip, int plural_commas)
 701 {
 702   /* Remember the message containing the msgid, for msgid_plural.  */
 703   message_ty *plural_mp = NULL;
 704
 705   /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
 706   int state;
 707   /* Parameters of the keyword just seen.  Defined only in state 1.  */
 708   int next_commas_to_skip = -1;
 709   int next_plural_commas = 0;
 710   /* Whether to implicitly assume the next tokens are arguments even without
 711      a '('.  */
 712   bool next_is_argument = false;
 713   /* Context iterator that will be used if the next token is a '('.  */
 714   flag_context_list_iterator_ty next_context_iter =
 715     passthrough_context_list_iterator;
 716   /* Current context.  */
 717   flag_context_ty inner_context =
 718     inherited_context (outer_context,
 719                        flag_context_list_iterator_advance (&context_iter));
 720
 721   /* Start state is 0.  */
 722   state = 0;
 723
 724   for (;;)
 725     {
 726       token_ty token;
 727
 728       x_awk_lex (&token);
 729
 730       if (next_is_argument && token.type != token_type_lparen)
 731         {
 732           /* An argument list starts, even though there is no '('.  */
 733           context_iter = next_context_iter;
 734           outer_context = inner_context;
 735           inner_context =
 736             inherited_context (outer_context,
 737                                flag_context_list_iterator_advance (
 738                                  &context_iter));
 739         }
 740
 741       switch (token.type)
 742         {
 743         case token_type_symbol:
 744           {
 745             void *keyword_value;
 746
 747             if (find_entry (&keywords, token.string, strlen (token.string),
 748                             &keyword_value)
 749                 == 0)
 750               {
 751                 int argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
 752                 int argnum2 = (int) (long) keyword_value >> 10;
 753
 754                 next_commas_to_skip = argnum1 - 1;
 755                 next_plural_commas = (argnum2 > argnum1 ? argnum2 - argnum1 : 0);
 756                 state = 1;
 757               }
 758             else
 759               state = 0;
 760           }
 761           next_is_argument =
 762             (strcmp (token.string, "print") == 0
 763              || strcmp (token.string, "printf") == 0);
 764           next_context_iter =
 765             flag_context_list_iterator (
 766               flag_context_list_table_lookup (
 767                 flag_context_list_table,
 768                 token.string, strlen (token.string)));
 769           free (token.string);
 770           continue;
 771
 772         case token_type_lparen:
 773           if (extract_parenthesized (mlp, inner_context, next_context_iter,
 774                                      state ? next_commas_to_skip : -1,
 775                                      state ? next_plural_commas : 0))
 776             return true;
 777           next_is_argument = false;
 778           next_context_iter = null_context_list_iterator;
 779           state = 0;
 780           continue;
 781
 782         case token_type_rparen:
 783           return false;
 784
 785         case token_type_comma:
 786           if (commas_to_skip >= 0)
 787             {
 788               if (commas_to_skip > 0)
 789                 commas_to_skip--;
 790               else
 791                 if (plural_mp != NULL && plural_commas > 0)
 792                   {
 793                     commas_to_skip = plural_commas - 1;
 794                     plural_commas = 0;
 795                   }
 796                 else
 797                   commas_to_skip = -1;
 798             }
 799           inner_context =
 800             inherited_context (outer_context,
 801                                flag_context_list_iterator_advance (
 802                                  &context_iter));
 803           next_is_argument = false;
 804           next_context_iter = passthrough_context_list_iterator;
 805           state = 0;
 806           continue;
 807
 808         case token_type_string:
 809           {
 810             lex_pos_ty pos;
 811             pos.file_name = logical_file_name;
 812             pos.line_number = token.line_number;
 813
 814             if (extract_all)
 815               remember_a_message (mlp, token.string, inner_context, &pos);
 816             else
 817               {
 818                 if (commas_to_skip == 0)
 819                   {
 820                     if (plural_mp == NULL)
 821                       {
 822                         /* Seen an msgid.  */
 823                         message_ty *mp =
 824                           remember_a_message (mlp, token.string,
 825                                               inner_context, &pos);
 826                         if (plural_commas > 0)
 827                           plural_mp = mp;
 828                       }
 829                     else
 830                       {
 831                         /* Seen an msgid_plural.  */
 832                         remember_a_message_plural (plural_mp, token.string,
 833                                                    inner_context, &pos);
 834                         plural_mp = NULL;
 835                       }
 836                   }
 837                 else
 838                   free (token.string);
 839               }
 840           }
 841           next_is_argument = false;
 842           next_context_iter = null_context_list_iterator;
 843           state = 0;
 844           continue;
 845
 846         case token_type_i18nstring:
 847           {
 848             lex_pos_ty pos;
 849             pos.file_name = logical_file_name;
 850             pos.line_number = token.line_number;
 851
 852             remember_a_message (mlp, token.string, inner_context, &pos);
 853           }
 854           next_is_argument = false;
 855           next_context_iter = null_context_list_iterator;
 856           state = 0;
 857           continue;
 858
 859         case token_type_semicolon:
 860           /* An argument list ends, and a new statement begins.  */
 861           /* FIXME: Should handle newline that acts as statement separator
 862              in the same way.  */
 863           /* FIXME: Instead of resetting outer_context here, it may be better
 864              to recurse in the next_is_argument handling above, waiting for
 865              the next semicolon or other statement terminator.  */
 866           outer_context = null_context;
 867           context_iter = null_context_list_iterator;
 868           next_is_argument = false;
 869           next_context_iter = passthrough_context_list_iterator;
 870           inner_context =
 871             inherited_context (outer_context,
 872                                flag_context_list_iterator_advance (
 873                                  &context_iter));
 874           state = 0;
 875           continue;
 876
 877         case token_type_eof:
 878           return true;
 879
 880         case token_type_other:
 881           next_is_argument = false;
 882           next_context_iter = null_context_list_iterator;
 883           state = 0;
 884           continue;
 885
 886         default:
 887           abort ();
 888         }
 889     }
 890 }
 891
 892
 893 void
 894 extract_awk (FILE *f,
 895              const char *real_filename, const char *logical_filename,
 896              flag_context_list_table_ty *flag_table,
 897              msgdomain_list_ty *mdlp)
 898 {
 899   message_list_ty *mlp = mdlp->item[0]->messages;
 900
 901   fp = f;
 902   real_file_name = real_filename;
 903   logical_file_name = xstrdup (logical_filename);
 904   line_number = 1;
 905
 906   last_comment_line = -1;
 907   last_non_comment_line = -1;
 908
 909   prefer_division_over_regexp = false;
 910
 911   flag_context_list_table = flag_table;
 912
 913   init_keywords ();
 914
 915   /* Eat tokens until eof is seen.  When extract_parenthesized returns
 916      due to an unbalanced closing parenthesis, just restart it.  */
 917   while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
 918                                  -1, 0))
 919     ;
 920
 921   fp = NULL;
 922   real_file_name = NULL;
 923   logical_file_name = NULL;
 924   line_number = 0;
 925 }