gnu/dist/gettext/gettext-tools/src/x-librep.c

   1 /* xgettext librep backend.
   2    Copyright (C) 2001-2003 Free Software Foundation, Inc.
   3
   4    This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software Foundation,
  18    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include "config.h"
  22 #endif
  23
  24 #include <ctype.h>
  25 #include <errno.h>
  26 #include <stdbool.h>
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30
  31 #include "message.h"
  32 #include "xgettext.h"
  33 #include "x-librep.h"
  34 #include "error.h"
  35 #include "xalloc.h"
  36 #include "exit.h"
  37 #include "hash.h"
  38 #include "gettext.h"
  39
  40 #define _(s) gettext(s)
  41
  42
  43 /* Summary of librep syntax:
  44    - ';' starts a comment until end of line.
  45    - Block comments start with '#|' and end with '|#'.
  46    - Numbers are constituted of an optional prefix (#b, #B for binary,
  47      #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
  48      #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
  49      the digits.
  50    - Characters are written as '?' followed by the character, possibly
  51      with an escape sequence, for examples '?a', '?\n', '?\177'.
  52    - Strings are delimited by double quotes. Backslash introduces an escape
  53      sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
  54      '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
  55    - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
  56      if preceded by backslash or enclosed in |...|.
  57    - Keywords: written as #:SYMBOL.
  58    - () delimit lists.
  59    - [] delimit vectors.
  60    The reader is implemented in librep-0.14/src/lisp.c.  */
  61
  62
  63 /* ====================== Keyword set customization.  ====================== */
  64
  65 /* If true extract all strings.  */
  66 static bool extract_all = false;
  67
  68 static hash_table keywords;
  69 static bool default_keywords = true;
  70
  71
  72 void
  73 x_librep_extract_all ()
  74 {
  75   extract_all = true;
  76 }
  77
  78
  79 void
  80 x_librep_keyword (const char *name)
  81 {
  82   if (name == NULL)
  83     default_keywords = false;
  84   else
  85     {
  86       const char *end;
  87       int argnum1;
  88       int argnum2;
  89       const char *colon;
  90
  91       if (keywords.table == NULL)
  92         init_hash (&keywords, 100);
  93
  94       split_keywordspec (name, &end, &argnum1, &argnum2);
  95
  96       /* The characters between name and end should form a valid Lisp
  97          symbol.  */
  98       colon = strchr (name, ':');
  99       if (colon == NULL || colon >= end)
 100         {
 101           if (argnum1 == 0)
 102             argnum1 = 1;
 103           insert_entry (&keywords, name, end - name,
 104                         (void *) (long) (argnum1 + (argnum2 << 10)));
 105         }
 106     }
 107 }
 108
 109 /* Finish initializing the keywords hash table.
 110    Called after argument processing, before each file is processed.  */
 111 static void
 112 init_keywords ()
 113 {
 114   if (default_keywords)
 115     {
 116       x_librep_keyword ("_");
 117       default_keywords = false;
 118     }
 119 }
 120
 121 void
 122 init_flag_table_librep ()
 123 {
 124   xgettext_record_flag ("_:1:pass-librep-format");
 125   xgettext_record_flag ("format:2:librep-format");
 126 }
 127
 128
 129 /* ======================== Reading of characters.  ======================== */
 130
 131 /* Real filename, used in error messages about the input file.  */
 132 static const char *real_file_name;
 133
 134 /* Logical filename and line number, used to label the extracted messages.  */
 135 static char *logical_file_name;
 136 static int line_number;
 137
 138 /* The input file stream.  */
 139 static FILE *fp;
 140
 141
 142 /* Fetch the next character from the input file.  */
 143 static int
 144 do_getc ()
 145 {
 146   int c = getc (fp);
 147
 148   if (c == EOF)
 149     {
 150       if (ferror (fp))
 151         error (EXIT_FAILURE, errno, _("\
 152 error while reading \"%s\""), real_file_name);
 153     }
 154   else if (c == '\n')
 155    line_number++;
 156
 157   return c;
 158 }
 159
 160 /* Put back the last fetched character, not EOF.  */
 161 static void
 162 do_ungetc (int c)
 163 {
 164   if (c == '\n')
 165     line_number--;
 166   ungetc (c, fp);
 167 }
 168
 169
 170 /* ========================== Reading of tokens.  ========================== */
 171
 172
 173 /* A token consists of a sequence of characters.  */
 174 struct token
 175 {
 176   int allocated;                /* number of allocated 'token_char's */
 177   int charcount;                /* number of used 'token_char's */
 178   char *chars;                  /* the token's constituents */
 179 };
 180
 181 /* Initialize a 'struct token'.  */
 182 static inline void
 183 init_token (struct token *tp)
 184 {
 185   tp->allocated = 10;
 186   tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
 187   tp->charcount = 0;
 188 }
 189
 190 /* Free the memory pointed to by a 'struct token'.  */
 191 static inline void
 192 free_token (struct token *tp)
 193 {
 194   free (tp->chars);
 195 }
 196
 197 /* Ensure there is enough room in the token for one more character.  */
 198 static inline void
 199 grow_token (struct token *tp)
 200 {
 201   if (tp->charcount == tp->allocated)
 202     {
 203       tp->allocated *= 2;
 204       tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
 205     }
 206 }
 207
 208 /* Read the next token.  If 'first' is given, it points to the first
 209    character, which has already been read.  Returns true for a symbol,
 210    false for a number.  */
 211 static bool
 212 read_token (struct token *tp, const int *first)
 213 {
 214   int c;
 215   /* Variables for speculative number parsing:  */
 216   int radix = -1;
 217   int nfirst = 0;
 218   bool exact = true;
 219   bool rational = false;
 220   bool exponent = false;
 221   bool had_sign = false;
 222   bool expecting_prefix = false;
 223
 224   init_token (tp);
 225
 226   if (first)
 227     c = *first;
 228   else
 229     c = do_getc ();
 230
 231   for (;; c = do_getc ())
 232     {
 233       switch (c)
 234         {
 235         case EOF:
 236           goto done;
 237
 238         case ' ': case '\t': case '\n': case '\f': case '\r':
 239         case '(': case ')': case '[': case ']':
 240         case '\'': case '"': case ';': case ',': case '`':
 241           goto done;
 242
 243         case '\\':
 244           radix = 0;
 245           c = do_getc ();
 246           if (c == EOF)
 247             /* Invalid, but be tolerant.  */
 248             break;
 249           grow_token (tp);
 250           tp->chars[tp->charcount++] = c;
 251           break;
 252
 253         case '|':
 254           radix = 0;
 255           for (;;)
 256             {
 257               c = do_getc ();
 258               if (c == EOF || c == '|')
 259                 break;
 260               grow_token (tp);
 261               tp->chars[tp->charcount++] = c;
 262             }
 263           break;
 264
 265         default:
 266           if (radix != 0)
 267             {
 268               if (expecting_prefix)
 269                 {
 270                   switch (c)
 271                     {
 272                     case 'B': case 'b':
 273                       radix = 2;
 274                       break;
 275                     case 'O': case 'o':
 276                       radix = 8;
 277                       break;
 278                     case 'D': case 'd':
 279                       radix = 10;
 280                       break;
 281                     case 'X': case 'x':
 282                       radix = 16;
 283                       break;
 284                     case 'E': case 'e':
 285                     case 'I': case 'i':
 286                       break;
 287                     default:
 288                       radix = 0;
 289                       break;
 290                     }
 291                   expecting_prefix = false;
 292                   nfirst = tp->charcount + 1;
 293                 }
 294               else if (tp->charcount == nfirst
 295                        && (c == '+' || c == '-' || c == '#'))
 296                 {
 297                   if (c == '#')
 298                     {
 299                       if (had_sign)
 300                         radix = 0;
 301                       else
 302                         expecting_prefix = true;
 303                     }
 304                   else
 305                     had_sign = true;
 306                   nfirst = tp->charcount + 1;
 307                 }
 308               else
 309                 {
 310                   switch (radix)
 311                     {
 312                     case -1:
 313                       if (c == '.')
 314                         {
 315                           radix = 10;
 316                           exact = false;
 317                         }
 318                       else if (!(c >= '0' && c <= '9'))
 319                         radix = 0;
 320                       else if (c == '0')
 321                         radix = 1;
 322                       else
 323                         radix = 10;
 324                       break;
 325
 326                     case 1:
 327                       switch (c)
 328                         {
 329                         case 'X': case 'x':
 330                           radix = 16;
 331                           nfirst = tp->charcount + 1;
 332                           break;
 333                         case '0': case '1': case '2': case '3': case '4':
 334                         case '5': case '6': case '7':
 335                           radix = 8;
 336                           nfirst = tp->charcount;
 337                           break;
 338                         case '.': case 'E': case 'e':
 339                           radix = 10;
 340                           exact = false;
 341                           break;
 342                         case '/':
 343                           radix = 10;
 344                           rational = true;
 345                           break;
 346                         default:
 347                           radix = 0;
 348                           break;
 349                         }
 350                       break;
 351
 352                     default:
 353                       switch (c)
 354                         {
 355                         case '.':
 356                           if (exact && radix == 10 && !rational)
 357                             exact = false;
 358                           else
 359                             radix = 0;
 360                           break;
 361                         case '/':
 362                           if (exact && !rational)
 363                             rational = true;
 364                           else
 365                             radix = 0;
 366                           break;
 367                         case 'E': case 'e':
 368                           if (radix == 10)
 369                             {
 370                               if (!rational && !exponent)
 371                                 {
 372                                   exponent = true;
 373                                   exact = false;
 374                                 }
 375                               else
 376                                 radix = 0;
 377                               break;
 378                             }
 379                           /*FALLTHROUGH*/
 380                         default:
 381                           if (exponent && (c == '+' || c == '-'))
 382                             break;
 383                           if ((radix <= 10
 384                                && !(c >= '0' && c <= '0' + radix - 1))
 385                               || (radix == 16 && !isxdigit (c)))
 386                             radix = 0;
 387                           break;
 388                         }
 389                       break;
 390                     }
 391                 }
 392             }
 393           else
 394             {
 395               if (c == '#')
 396                 goto done;
 397             }
 398           grow_token (tp);
 399           tp->chars[tp->charcount++] = c;
 400         }
 401     }
 402  done:
 403   if (c != EOF)
 404     do_ungetc (c);
 405   if (radix > 0 && nfirst < tp->charcount)
 406     return false; /* number */
 407   else
 408     return true; /* symbol */
 409 }
 410
 411
 412 /* ========================= Accumulating comments ========================= */
 413
 414
 415 static char *buffer;
 416 static size_t bufmax;
 417 static size_t buflen;
 418
 419 static inline void
 420 comment_start ()
 421 {
 422   buflen = 0;
 423 }
 424
 425 static inline void
 426 comment_add (int c)
 427 {
 428   if (buflen >= bufmax)
 429     {
 430       bufmax = 2 * bufmax + 10;
 431       buffer = xrealloc (buffer, bufmax);
 432     }
 433   buffer[buflen++] = c;
 434 }
 435
 436 static inline void
 437 comment_line_end (size_t chars_to_remove)
 438 {
 439   buflen -= chars_to_remove;
 440   while (buflen >= 1
 441          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
 442     --buflen;
 443   if (chars_to_remove == 0 && buflen >= bufmax)
 444     {
 445       bufmax = 2 * bufmax + 10;
 446       buffer = xrealloc (buffer, bufmax);
 447     }
 448   buffer[buflen] = '\0';
 449   xgettext_comment_add (buffer);
 450 }
 451
 452
 453 /* These are for tracking whether comments count as immediately before
 454    keyword.  */
 455 static int last_comment_line;
 456 static int last_non_comment_line;
 457
 458
 459 /* ========================= Accumulating messages ========================= */
 460
 461
 462 static message_list_ty *mlp;
 463
 464
 465 /* ============== Reading of objects.  See CLHS 2 "Syntax".  ============== */
 466
 467
 468 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
 469    Other objects need not to be represented precisely.  */
 470 enum object_type
 471 {
 472   t_symbol,     /* symbol */
 473   t_string,     /* string */
 474   t_other,      /* other kind of real object */
 475   t_dot,        /* '.' pseudo object */
 476   t_close,      /* ')' or ']' pseudo object */
 477   t_eof         /* EOF marker */
 478 };
 479
 480 struct object
 481 {
 482   enum object_type type;
 483   struct token *token;          /* for t_symbol and t_string */
 484   int line_number_at_start;     /* for t_string */
 485 };
 486
 487 /* Free the memory pointed to by a 'struct object'.  */
 488 static inline void
 489 free_object (struct object *op)
 490 {
 491   if (op->type == t_symbol || op->type == t_string)
 492     {
 493       free_token (op->token);
 494       free (op->token);
 495     }
 496 }
 497
 498 /* Convert a t_symbol/t_string token to a char*.  */
 499 static char *
 500 string_of_object (const struct object *op)
 501 {
 502   char *str;
 503   int n;
 504
 505   if (!(op->type == t_symbol || op->type == t_string))
 506     abort ();
 507   n = op->token->charcount;
 508   str = (char *) xmalloc (n + 1);
 509   memcpy (str, op->token->chars, n);
 510   str[n] = '\0';
 511   return str;
 512 }
 513
 514 /* Context lookup table.  */
 515 static flag_context_list_table_ty *flag_context_list_table;
 516
 517 /* Returns the character represented by an escape sequence.  */
 518 static int
 519 do_getc_escaped (int c)
 520 {
 521   switch (c)
 522     {
 523     case 'n':
 524       return '\n';
 525     case 'r':
 526       return '\r';
 527     case 'f':
 528       return '\f';
 529     case 't':
 530       return '\t';
 531     case 'v':
 532       return '\v';
 533     case 'a':
 534       return '\a';
 535     case '^':
 536       c = do_getc ();
 537       if (c == EOF)
 538         return EOF;
 539       return c & 0x1f;
 540     case '0': case '1': case '2': case '3': case '4':
 541     case '5': case '6': case '7':
 542       {
 543         int n = c - '0';
 544
 545         c = do_getc ();
 546         if (c != EOF)
 547           {
 548             if (c >= '0' && c <= '7')
 549               {
 550                 n = (n << 3) + (c - '0');
 551                 c = do_getc ();
 552                 if (c != EOF)
 553                   {
 554                     if (c >= '0' && c <= '7')
 555                       n = (n << 3) + (c - '0');
 556                     else
 557                       do_ungetc (c);
 558                   }
 559               }
 560             else
 561               do_ungetc (c);
 562           }
 563         return (unsigned char) n;
 564       }
 565     case 'x':
 566       {
 567         int n = 0;
 568
 569         for (;;)
 570           {
 571             c = do_getc ();
 572             if (c == EOF)
 573               break;
 574             else if (c >= '0' && c <= '9')
 575               n = (n << 4) + (c - '0');
 576             else if (c >= 'A' && c <= 'F')
 577               n = (n << 4) + (c - 'A' + 10);
 578             else if (c >= 'a' && c <= 'f')
 579               n = (n << 4) + (c - 'a' + 10);
 580             else
 581               {
 582                 do_ungetc (c);
 583                 break;
 584               }
 585           }
 586         return (unsigned char) n;
 587       }
 588     default:
 589       return c;
 590     }
 591 }
 592
 593 /* Read the next object.  */
 594 static void
 595 read_object (struct object *op, flag_context_ty outer_context)
 596 {
 597   for (;;)
 598     {
 599       int c;
 600
 601       c = do_getc ();
 602
 603       switch (c)
 604         {
 605         case EOF:
 606           op->type = t_eof;
 607           return;
 608
 609         case '\n':
 610           /* Comments assumed to be grouped with a message must immediately
 611              precede it, with no non-whitespace token on a line between
 612              both.  */
 613           if (last_non_comment_line > last_comment_line)
 614             xgettext_comment_reset ();
 615           continue;
 616
 617         case ' ': case '\t': case '\f': case '\r':
 618           continue;
 619
 620         case '(':
 621           {
 622             int arg = 0;                /* Current argument number.  */
 623             flag_context_list_iterator_ty context_iter;
 624             int argnum1 = 0;            /* First string position.  */
 625             int argnum2 = 0;            /* Plural string position.  */
 626             message_ty *plural_mp = NULL;       /* Remember the msgid.  */
 627
 628             for (;; arg++)
 629               {
 630                 struct object inner;
 631                 flag_context_ty inner_context;
 632
 633                 if (arg == 0)
 634                   inner_context = null_context;
 635                 else
 636                   inner_context =
 637                     inherited_context (outer_context,
 638                                        flag_context_list_iterator_advance (
 639                                          &context_iter));
 640
 641                 read_object (&inner, inner_context);
 642
 643                 /* Recognize end of list.  */
 644                 if (inner.type == t_close)
 645                   {
 646                     op->type = t_other;
 647                     /* Don't bother converting "()" to "NIL".  */
 648                     last_non_comment_line = line_number;
 649                     return;
 650                   }
 651
 652                 /* Dots are not allowed in every position.
 653                    But be tolerant.  */
 654
 655                 /* EOF inside list is illegal.  But be tolerant.  */
 656                 if (inner.type == t_eof)
 657                   break;
 658
 659                 if (arg == 0)
 660                   {
 661                     /* This is the function position.  */
 662                     if (inner.type == t_symbol)
 663                       {
 664                         char *symbol_name = string_of_object (&inner);
 665                         void *keyword_value;
 666
 667                         if (find_entry (&keywords,
 668                                         symbol_name, strlen (symbol_name),
 669                                         &keyword_value)
 670                             == 0)
 671                           {
 672                             argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
 673                             argnum2 = (int) (long) keyword_value >> 10;
 674                           }
 675
 676                         context_iter =
 677                           flag_context_list_iterator (
 678                             flag_context_list_table_lookup (
 679                               flag_context_list_table,
 680                               symbol_name, strlen (symbol_name)));
 681
 682                         free (symbol_name);
 683                       }
 684                     else
 685                       context_iter = null_context_list_iterator;
 686                   }
 687                 else
 688                   {
 689                     /* These are the argument positions.
 690                        Extract a string if we have reached the right
 691                        argument position.  */
 692                     if (arg == argnum1)
 693                       {
 694                         if (inner.type == t_string)
 695                           {
 696                             lex_pos_ty pos;
 697                             message_ty *mp;
 698
 699                             pos.file_name = logical_file_name;
 700                             pos.line_number = inner.line_number_at_start;
 701                             mp = remember_a_message (mlp, string_of_object (&inner),
 702                                                      inner_context, &pos);
 703                             if (argnum2 > 0)
 704                               plural_mp = mp;
 705                           }
 706                       }
 707                     else if (arg == argnum2)
 708                       {
 709                         if (inner.type == t_string && plural_mp != NULL)
 710                           {
 711                             lex_pos_ty pos;
 712
 713                             pos.file_name = logical_file_name;
 714                             pos.line_number = inner.line_number_at_start;
 715                             remember_a_message_plural (plural_mp, string_of_object (&inner),
 716                                                        inner_context, &pos);
 717                           }
 718                       }
 719                   }
 720
 721                 free_object (&inner);
 722               }
 723           }
 724           op->type = t_other;
 725           last_non_comment_line = line_number;
 726           return;
 727
 728         case '[':
 729           {
 730             for (;;)
 731               {
 732                 struct object inner;
 733
 734                 read_object (&inner, null_context);
 735
 736                 /* Recognize end of vector.  */
 737                 if (inner.type == t_close)
 738                   {
 739                     op->type = t_other;
 740                     last_non_comment_line = line_number;
 741                     return;
 742                   }
 743
 744                 /* Dots are not allowed.  But be tolerant.  */
 745
 746                 /* EOF inside vector is illegal.  But be tolerant.  */
 747                 if (inner.type == t_eof)
 748                   break;
 749
 750                 free_object (&inner);
 751               }
 752           }
 753           op->type = t_other;
 754           last_non_comment_line = line_number;
 755           return;
 756
 757         case ')': case ']':
 758           /* Tell the caller about the end of list or vector.
 759              Unmatched closing parenthesis is illegal.  But be tolerant.  */
 760           op->type = t_close;
 761           last_non_comment_line = line_number;
 762           return;
 763
 764         case ',':
 765           {
 766             int c = do_getc ();
 767             /* The ,@ handling inside lists is wrong anyway, because
 768                ,@form expands to an unknown number of elements.  */
 769             if (c != EOF && c != '@')
 770               do_ungetc (c);
 771           }
 772           /*FALLTHROUGH*/
 773         case '\'':
 774         case '`':
 775           {
 776             struct object inner;
 777
 778             read_object (&inner, null_context);
 779
 780             /* Dots and EOF are not allowed here.  But be tolerant.  */
 781
 782             free_object (&inner);
 783
 784             op->type = t_other;
 785             last_non_comment_line = line_number;
 786             return;
 787           }
 788
 789         case ';':
 790           {
 791             bool all_semicolons = true;
 792
 793             last_comment_line = line_number;
 794             comment_start ();
 795             for (;;)
 796               {
 797                 int c = do_getc ();
 798                 if (c == EOF || c == '\n' || c == '\f' || c == '\r')
 799                   break;
 800                 if (c != ';')
 801                   all_semicolons = false;
 802                 if (!all_semicolons)
 803                   {
 804                     /* We skip all leading white space, but not EOLs.  */
 805                     if (!(buflen == 0 && (c == ' ' || c == '\t')))
 806                       comment_add (c);
 807                   }
 808               }
 809             comment_line_end (0);
 810             continue;
 811           }
 812
 813         case '"':
 814           {
 815             op->token = (struct token *) xmalloc (sizeof (struct token));
 816             init_token (op->token);
 817             op->line_number_at_start = line_number;
 818             for (;;)
 819               {
 820                 int c = do_getc ();
 821                 if (c == EOF)
 822                   /* Invalid input.  Be tolerant, no error message.  */
 823                   break;
 824                 if (c == '"')
 825                   break;
 826                 if (c == '\\')
 827                   {
 828                     c = do_getc ();
 829                     if (c == EOF)
 830                       /* Invalid input.  Be tolerant, no error message.  */
 831                       break;
 832                     if (c == '\n')
 833                       /* Ignore escaped newline.  */
 834                       ;
 835                     else
 836                       {
 837                         c = do_getc_escaped (c);
 838                         if (c == EOF)
 839                           /* Invalid input.  Be tolerant, no error message.  */
 840                           break;
 841                         grow_token (op->token);
 842                         op->token->chars[op->token->charcount++] = c;
 843                       }
 844                   }
 845                 else
 846                   {
 847                     grow_token (op->token);
 848                     op->token->chars[op->token->charcount++] = c;
 849                   }
 850               }
 851             op->type = t_string;
 852
 853             if (extract_all)
 854               {
 855                 lex_pos_ty pos;
 856
 857                 pos.file_name = logical_file_name;
 858                 pos.line_number = op->line_number_at_start;
 859                 remember_a_message (mlp, string_of_object (op),
 860                                     null_context, &pos);
 861               }
 862             last_non_comment_line = line_number;
 863             return;
 864           }
 865
 866         case '?':
 867           c = do_getc ();
 868           if (c == EOF)
 869             /* Invalid input.  Be tolerant, no error message.  */
 870             ;
 871           else if (c == '\\')
 872             {
 873               c = do_getc ();
 874               if (c == EOF)
 875                 /* Invalid input.  Be tolerant, no error message.  */
 876                 ;
 877               else
 878                 {
 879                   c = do_getc_escaped (c);
 880                   if (c == EOF)
 881                     /* Invalid input.  Be tolerant, no error message.  */
 882                     ;
 883                 }
 884             }
 885           op->type = t_other;
 886           last_non_comment_line = line_number;
 887           return;
 888
 889         case '#':
 890           /* Dispatch macro handling.  */
 891           c = do_getc ();
 892           if (c == EOF)
 893             /* Invalid input.  Be tolerant, no error message.  */
 894             {
 895               op->type = t_other;
 896               return;
 897             }
 898
 899           switch (c)
 900             {
 901             case '!':
 902               if (ftell (fp) == 2)
 903                 /* Skip comment until !# */
 904                 {
 905                   c = do_getc ();
 906                   for (;;)
 907                     {
 908                       if (c == EOF)
 909                         break;
 910                       if (c == '!')
 911                         {
 912                           c = do_getc ();
 913                           if (c == EOF || c == '#')
 914                             break;
 915                         }
 916                       else
 917                         c = do_getc ();
 918                     }
 919                   if (c == EOF)
 920                     {
 921                       /* EOF not allowed here.  But be tolerant.  */
 922                       op->type = t_eof;
 923                       return;
 924                     }
 925                   continue;
 926                 }
 927               /*FALLTHROUGH*/
 928             case '\'':
 929             case ':':
 930               {
 931                 struct object inner;
 932                 read_object (&inner, null_context);
 933                 /* Dots and EOF are not allowed here.
 934                    But be tolerant.  */
 935                 free_object (&inner);
 936                 op->type = t_other;
 937                 last_non_comment_line = line_number;
 938                 return;
 939               }
 940
 941             case '[':
 942             case '(':
 943               {
 944                 struct object inner;
 945                 do_ungetc (c);
 946                 read_object (&inner, null_context);
 947                 /* Dots and EOF are not allowed here.
 948                    But be tolerant.  */
 949                 free_object (&inner);
 950                 op->type = t_other;
 951                 last_non_comment_line = line_number;
 952                 return;
 953               }
 954
 955             case '|':
 956               {
 957                 int depth = 0;
 958
 959                 comment_start ();
 960                 c = do_getc ();
 961                 for (;;)
 962                   {
 963                     if (c == EOF)
 964                       break;
 965                     if (c == '|')
 966                       {
 967                         c = do_getc ();
 968                         if (c == EOF)
 969                           break;
 970                         if (c == '#')
 971                           {
 972                             if (depth == 0)
 973                               {
 974                                 comment_line_end (0);
 975                                 break;
 976                               }
 977                             depth--;
 978                             comment_add ('|');
 979                             comment_add ('#');
 980                             c = do_getc ();
 981                           }
 982                         else
 983                           comment_add ('|');
 984                       }
 985                     else if (c == '#')
 986                       {
 987                         c = do_getc ();
 988                         if (c == EOF)
 989                           break;
 990                         comment_add ('#');
 991                         if (c == '|')
 992                           {
 993                             depth++;
 994                             comment_add ('|');
 995                             c = do_getc ();
 996                           }
 997                       }
 998                     else
 999                       {
1000                         /* We skip all leading white space.  */
1001                         if (!(buflen == 0 && (c == ' ' || c == '\t')))
1002                           comment_add (c);
1003                         if (c == '\n')
1004                           {
1005                             comment_line_end (1);
1006                             comment_start ();
1007                           }
1008                         c = do_getc ();
1009                       }
1010                   }
1011                 if (c == EOF)
1012                   {
1013                     /* EOF not allowed here.  But be tolerant.  */
1014                     op->type = t_eof;
1015                     return;
1016                   }
1017                 last_comment_line = line_number;
1018                 continue;
1019               }
1020
1021             case '\\':
1022               {
1023                 struct token token;
1024                 int first = '\\';
1025                 read_token (&token, &first);
1026                 free_token (&token);
1027                 op->type = t_other;
1028                 last_non_comment_line = line_number;
1029                 return;
1030               }
1031
1032             case 'T': case 't':
1033             case 'F': case 'f':
1034               op->type = t_other;
1035               last_non_comment_line = line_number;
1036               return;
1037
1038             case 'B': case 'b':
1039             case 'O': case 'o':
1040             case 'D': case 'd':
1041             case 'X': case 'x':
1042             case 'E': case 'e':
1043             case 'I': case 'i':
1044               {
1045                 struct token token;
1046                 do_ungetc (c);
1047                 c = '#';
1048                 read_token (&token, &c);
1049                 free_token (&token);
1050                 op->type = t_other;
1051                 last_non_comment_line = line_number;
1052                 return;
1053               }
1054
1055             default:
1056               /* Invalid input.  Be tolerant, no error message.  */
1057               op->type = t_other;
1058               last_non_comment_line = line_number;
1059               return;
1060             }
1061
1062           /*NOTREACHED*/
1063           abort ();
1064
1065         default:
1066           /* Read a token.  */
1067           {
1068             bool symbol;
1069
1070             op->token = (struct token *) xmalloc (sizeof (struct token));
1071             symbol = read_token (op->token, &c);
1072             if (op->token->charcount == 1 && op->token->chars[0] == '.')
1073               {
1074                 free_token (op->token);
1075                 free (op->token);
1076                 op->type = t_dot;
1077                 last_non_comment_line = line_number;
1078                 return;
1079               }
1080             if (!symbol)
1081               {
1082                 free_token (op->token);
1083                 free (op->token);
1084                 op->type = t_other;
1085                 last_non_comment_line = line_number;
1086                 return;
1087               }
1088             /* Distinguish between "foo" and "foo#bar".  */
1089             c = do_getc ();
1090             if (c == '#')
1091               {
1092                 struct token second_token;
1093
1094                 free_token (op->token);
1095                 free (op->token);
1096                 read_token (&second_token, NULL);
1097                 free_token (&second_token);
1098                 op->type = t_other;
1099                 last_non_comment_line = line_number;
1100                 return;
1101               }
1102             else
1103               {
1104                 if (c != EOF)
1105                   do_ungetc (c);
1106                 op->type = t_symbol;
1107                 last_non_comment_line = line_number;
1108                 return;
1109               }
1110           }
1111         }
1112     }
1113 }
1114
1115
1116 void
1117 extract_librep (FILE *f,
1118                 const char *real_filename, const char *logical_filename,
1119                 flag_context_list_table_ty *flag_table,
1120                 msgdomain_list_ty *mdlp)
1121 {
1122   mlp = mdlp->item[0]->messages;
1123
1124   fp = f;
1125   real_file_name = real_filename;
1126   logical_file_name = xstrdup (logical_filename);
1127   line_number = 1;
1128
1129   last_comment_line = -1;
1130   last_non_comment_line = -1;
1131
1132   flag_context_list_table = flag_table;
1133
1134   init_keywords ();
1135
1136   /* Eat tokens until eof is seen.  When read_object returns
1137      due to an unbalanced closing parenthesis, just restart it.  */
1138   do
1139     {
1140       struct object toplevel_object;
1141
1142       read_object (&toplevel_object, null_context);
1143
1144       if (toplevel_object.type == t_eof)
1145         break;
1146
1147       free_object (&toplevel_object);
1148     }
1149   while (!feof (fp));
1150
1151   /* Close scanner.  */
1152   fp = NULL;
1153   real_file_name = NULL;
1154   logical_file_name = NULL;
1155   line_number = 0;
1156 }