src/ptx.c

   1 /* Permuted index for GNU, with keywords in their context.
   2    Copyright (C) 1990, 1991, 1993, 1998-2003 Free Software Foundation, Inc.
   3    François Pinard <pinard@iro.umontreal.ca>, 1988.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18
  19    François Pinard <pinard@iro.umontreal.ca> */
  20
  21 #include <config.h>
  22
  23 #include <stdio.h>
  24 #include <getopt.h>
  25 #include <sys/types.h>
  26 #include "system.h"
  27 #include "argmatch.h"
  28 #include "diacrit.h"
  29 #include "error.h"
  30 #include "regex.h"
  31
  32 /* The official name of this program (e.g., no `g' prefix).  */
  33 #define PROGRAM_NAME "ptx"
  34
  35 /* Note to translator: Please translate "F. Pinard" to "François
  36    Pinard" if "ç" (c-with-cedilla) is available in the
  37    translation's character set and encoding.  */
  38 #define AUTHORS _("F. Pinard")
  39
  40 /* Number of possible characters in a byte.  */
  41 #define CHAR_SET_SIZE 256
  42
  43 #define ISODIGIT(C) ((C) >= '0' && (C) <= '7')
  44 #define HEXTOBIN(C) ((C) >= 'a' && (C) <= 'f' ? (C)-'a'+10 \
  45                      : (C) >= 'A' && (C) <= 'F' ? (C)-'A'+10 : (C)-'0')
  46 #define OCTTOBIN(C) ((C) - '0')
  47
  48 /* Debugging the memory allocator.  */
  49
  50 #if WITH_DMALLOC
  51 # define MALLOC_FUNC_CHECK 1
  52 # include <dmalloc.h>
  53 #endif
  54 \f
  55 /* Global definitions.  */
  56
  57 /* Reallocation step when swallowing non regular files.  The value is not
  58    the actual reallocation step, but its base two logarithm.  */
  59 #define SWALLOW_REALLOC_LOG 12
  60
  61 /* Imported from "regex.c".  */
  62 #define Sword 1
  63
  64 /* The name this program was run with. */
  65 char *program_name;
  66
  67 /* Program options.  */
  68
  69 enum Format
  70 {
  71   UNKNOWN_FORMAT,               /* output format still unknown */
  72   DUMB_FORMAT,                  /* output for a dumb terminal */
  73   ROFF_FORMAT,                  /* output for `troff' or `nroff' */
  74   TEX_FORMAT                    /* output for `TeX' or `LaTeX' */
  75 };
  76
  77 int gnu_extensions = 1;         /* trigger all GNU extensions */
  78 int auto_reference = 0;         /* references are `file_name:line_number:' */
  79 int input_reference = 0;        /* references at beginning of input lines */
  80 int right_reference = 0;        /* output references after right context  */
  81 int line_width = 72;            /* output line width in characters */
  82 int gap_size = 3;               /* number of spaces between output fields */
  83 const char *truncation_string = "/";
  84                                 /* string used to mark line truncations */
  85 const char *macro_name = "xx";  /* macro name for roff or TeX output */
  86 enum Format output_format = UNKNOWN_FORMAT;
  87                                 /* output format */
  88
  89 int ignore_case = 0;            /* fold lower to upper case for sorting */
  90 const char *context_regex_string = NULL;
  91                                 /* raw regex for end of context */
  92 const char *word_regex_string = NULL;
  93                                 /* raw regex for a keyword */
  94 const char *break_file = NULL;  /* name of the `Break characters' file */
  95 const char *only_file = NULL;   /* name of the `Only words' file */
  96 const char *ignore_file = NULL; /* name of the `Ignore words' file */
  97
  98 /* A BLOCK delimit a region in memory of arbitrary size, like the copy of a
  99    whole file.  A WORD is something smaller, its length should fit in a
 100    short integer.  A WORD_TABLE may contain several WORDs.  */
 101
 102 typedef struct
 103   {
 104     char *start;                /* pointer to beginning of region */
 105     char *end;                  /* pointer to end + 1 of region */
 106   }
 107 BLOCK;
 108
 109 typedef struct
 110   {
 111     char *start;                /* pointer to beginning of region */
 112     short size;                 /* length of the region */
 113   }
 114 WORD;
 115
 116 typedef struct
 117   {
 118     WORD *start;                /* array of WORDs */
 119     size_t alloc;               /* allocated length */
 120     size_t length;              /* number of used entries */
 121   }
 122 WORD_TABLE;
 123
 124 /* Pattern description tables.  */
 125
 126 /* For each character, provide its folded equivalent.  */
 127 unsigned char folded_chars[CHAR_SET_SIZE];
 128
 129 /* For each character, indicate if it is part of a word.  */
 130 char syntax_table[CHAR_SET_SIZE];
 131 char *re_syntax_table = syntax_table;
 132
 133 /* Compiled regex for end of context.  */
 134 struct re_pattern_buffer *context_regex;
 135
 136 /* End of context pattern register indices.  */
 137 struct re_registers context_regs;
 138
 139 /* Compiled regex for a keyword.  */
 140 struct re_pattern_buffer *word_regex;
 141
 142 /* Keyword pattern register indices.  */
 143 struct re_registers word_regs;
 144
 145 /* A word characters fastmap is used only when no word regexp has been
 146    provided.  A word is then made up of a sequence of one or more characters
 147    allowed by the fastmap.  Contains !0 if character allowed in word.  Not
 148    only this is faster in most cases, but it simplifies the implementation
 149    of the Break files.  */
 150 char word_fastmap[CHAR_SET_SIZE];
 151
 152 /* Maximum length of any word read.  */
 153 int maximum_word_length;
 154
 155 /* Maximum width of any reference used.  */
 156 int reference_max_width;
 157
 158 /* Ignore and Only word tables.  */
 159
 160 WORD_TABLE ignore_table;        /* table of words to ignore */
 161 WORD_TABLE only_table;          /* table of words to select */
 162
 163 /* Source text table, and scanning macros.  */
 164
 165 int number_input_files;         /* number of text input files */
 166 int total_line_count;           /* total number of lines seen so far */
 167 const char **input_file_name;   /* array of text input file names */
 168 int *file_line_count;           /* array of `total_line_count' values at end */
 169
 170 BLOCK text_buffer;              /* file to study */
 171 char *text_buffer_maxend;       /* allocated end of text_buffer */
 172
 173 /* SKIP_NON_WHITE used only for getting or skipping the reference.  */
 174
 175 #define SKIP_NON_WHITE(cursor, limit) \
 176   while (cursor < limit && !ISSPACE(*cursor))                           \
 177     cursor++
 178
 179 #define SKIP_WHITE(cursor, limit) \
 180   while (cursor < limit && ISSPACE(*cursor))                            \
 181     cursor++
 182
 183 #define SKIP_WHITE_BACKWARDS(cursor, start) \
 184   while (cursor > start && ISSPACE(cursor[-1]))                         \
 185     cursor--
 186
 187 #define SKIP_SOMETHING(cursor, limit) \
 188   if (word_regex_string)                                                \
 189     {                                                                   \
 190       int count;                                                        \
 191       count = re_match (word_regex, cursor, limit - cursor, 0, NULL);   \
 192       cursor += count <= 0 ? 1 : count;                                 \
 193     }                                                                   \
 194   else if (word_fastmap[(unsigned char) *cursor])                       \
 195     while (cursor < limit && word_fastmap[(unsigned char) *cursor])     \
 196       cursor++;                                                         \
 197   else                                                                  \
 198     cursor++
 199
 200 /* Occurrences table.
 201
 202    The `keyword' pointer provides the central word, which is surrounded
 203    by a left context and a right context.  The `keyword' and `length'
 204    field allow full 8-bit characters keys, even including NULs.  At other
 205    places in this program, the name `keyafter' refers to the keyword
 206    followed by its right context.
 207
 208    The left context does not extend, towards the beginning of the file,
 209    further than a distance given by the `left' value.  This value is
 210    relative to the keyword beginning, it is usually negative.  This
 211    insures that, except for white space, we will never have to backward
 212    scan the source text, when it is time to generate the final output
 213    lines.
 214
 215    The right context, indirectly attainable through the keyword end, does
 216    not extend, towards the end of the file, further than a distance given
 217    by the `right' value.  This value is relative to the keyword
 218    beginning, it is usually positive.
 219
 220    When automatic references are used, the `reference' value is the
 221    overall line number in all input files read so far, in this case, it
 222    is of type (int).  When input references are used, the `reference'
 223    value indicates the distance between the keyword beginning and the
 224    start of the reference field, it is of type (DELTA) and usually
 225    negative.  */
 226
 227 typedef short DELTA;            /* to hold displacement within one context */
 228
 229 typedef struct
 230   {
 231     WORD key;                   /* description of the keyword */
 232     DELTA left;                 /* distance to left context start */
 233     DELTA right;                /* distance to right context end */
 234     int reference;              /* reference descriptor */
 235   }
 236 OCCURS;
 237
 238 /* The various OCCURS tables are indexed by the language.  But the time
 239    being, there is no such multiple language support.  */
 240
 241 OCCURS *occurs_table[1];        /* all words retained from the read text */
 242 size_t occurs_alloc[1];         /* allocated size of occurs_table */
 243 size_t number_of_occurs[1];     /* number of used slots in occurs_table */
 244
 245
 246 /* Communication among output routines.  */
 247
 248 /* Indicate if special output processing is requested for each character.  */
 249 char edited_flag[CHAR_SET_SIZE];
 250
 251 int half_line_width;            /* half of line width, reference excluded */
 252 int before_max_width;           /* maximum width of before field */
 253 int keyafter_max_width;         /* maximum width of keyword-and-after field */
 254 int truncation_string_length;   /* length of string used to flag truncation */
 255
 256 /* When context is limited by lines, wraparound may happen on final output:
 257    the `head' pointer gives access to some supplementary left context which
 258    will be seen at the end of the output line, the `tail' pointer gives
 259    access to some supplementary right context which will be seen at the
 260    beginning of the output line. */
 261
 262 BLOCK tail;                     /* tail field */
 263 int tail_truncation;            /* flag truncation after the tail field */
 264
 265 BLOCK before;                   /* before field */
 266 int before_truncation;          /* flag truncation before the before field */
 267
 268 BLOCK keyafter;                 /* keyword-and-after field */
 269 int keyafter_truncation;        /* flag truncation after the keyafter field */
 270
 271 BLOCK head;                     /* head field */
 272 int head_truncation;            /* flag truncation before the head field */
 273
 274 BLOCK reference;                /* reference field for input reference mode */
 275 \f
 276 /* Miscellaneous routines.  */
 277
 278 /*------------------------------------------------------.
 279 | Duplicate string STRING, while evaluating \-escapes.  |
 280 `------------------------------------------------------*/
 281
 282 /* Loosely adapted from GNU sh-utils printf.c code.  */
 283
 284 static char *
 285 copy_unescaped_string (const char *string)
 286 {
 287   char *result;                 /* allocated result */
 288   char *cursor;                 /* cursor in result */
 289   int value;                    /* value of \nnn escape */
 290   int length;                   /* length of \nnn escape */
 291
 292   result = xmalloc (strlen (string) + 1);
 293   cursor = result;
 294
 295   while (*string)
 296     if (*string == '\\')
 297       {
 298         string++;
 299         switch (*string)
 300           {
 301           case 'x':             /* \xhhh escape, 3 chars maximum */
 302             value = 0;
 303             for (length = 0, string++;
 304                  length < 3 && ISXDIGIT (*string);
 305                  length++, string++)
 306               value = value * 16 + HEXTOBIN (*string);
 307             if (length == 0)
 308               {
 309                 *cursor++ = '\\';
 310                 *cursor++ = 'x';
 311               }
 312             else
 313               *cursor++ = value;
 314             break;
 315
 316           case '0':             /* \0ooo escape, 3 chars maximum */
 317             value = 0;
 318             for (length = 0, string++;
 319                  length < 3 && ISODIGIT (*string);
 320                  length++, string++)
 321               value = value * 8 + OCTTOBIN (*string);
 322             *cursor++ = value;
 323             break;
 324
 325           case 'a':             /* alert */
 326 #if __STDC__
 327             *cursor++ = '\a';
 328 #else
 329             *cursor++ = 7;
 330 #endif
 331             string++;
 332             break;
 333
 334           case 'b':             /* backspace */
 335             *cursor++ = '\b';
 336             string++;
 337             break;
 338
 339           case 'c':             /* cancel the rest of the output */
 340             while (*string)
 341               string++;
 342             break;
 343
 344           case 'f':             /* form feed */
 345             *cursor++ = '\f';
 346             string++;
 347             break;
 348
 349           case 'n':             /* new line */
 350             *cursor++ = '\n';
 351             string++;
 352             break;
 353
 354           case 'r':             /* carriage return */
 355             *cursor++ = '\r';
 356             string++;
 357             break;
 358
 359           case 't':             /* horizontal tab */
 360             *cursor++ = '\t';
 361             string++;
 362             break;
 363
 364           case 'v':             /* vertical tab */
 365 #if __STDC__
 366             *cursor++ = '\v';
 367 #else
 368             *cursor++ = 11;
 369 #endif
 370             string++;
 371             break;
 372
 373           default:
 374             *cursor++ = '\\';
 375             *cursor++ = *string++;
 376             break;
 377           }
 378       }
 379     else
 380       *cursor++ = *string++;
 381
 382   *cursor = '\0';
 383   return result;
 384 }
 385
 386 /*-------------------------------------------------------------------.
 387 | Compile the regex represented by STRING, diagnose and abort if any |
 388 | error.  Returns the compiled regex structure.                      |
 389 `-------------------------------------------------------------------*/
 390
 391 static struct re_pattern_buffer *
 392 alloc_and_compile_regex (const char *string)
 393 {
 394   struct re_pattern_buffer *pattern; /* newly allocated structure */
 395   const char *message;          /* error message returned by regex.c */
 396
 397   pattern = xmalloc (sizeof *pattern);
 398   memset (pattern, 0, sizeof (struct re_pattern_buffer));
 399
 400   pattern->buffer = NULL;
 401   pattern->allocated = 0;
 402   pattern->translate = ignore_case ? (char *) folded_chars : NULL;
 403   pattern->fastmap = xmalloc ((size_t) CHAR_SET_SIZE);
 404
 405   message = re_compile_pattern (string, (int) strlen (string), pattern);
 406   if (message)
 407     error (EXIT_FAILURE, 0, _("%s (for regexp `%s')"), message, string);
 408
 409   /* The fastmap should be compiled before `re_match'.  The following
 410      call is not mandatory, because `re_search' is always called sooner,
 411      and it compiles the fastmap if this has not been done yet.  */
 412
 413   re_compile_fastmap (pattern);
 414
 415   /* Do not waste extra allocated space.  */
 416
 417   if (pattern->allocated > pattern->used)
 418     {
 419       pattern->buffer
 420         = xrealloc (pattern->buffer, (size_t) pattern->used);
 421       pattern->allocated = pattern->used;
 422     }
 423
 424   return pattern;
 425 }
 426
 427 /*------------------------------------------------------------------------.
 428 | This will initialize various tables for pattern match and compiles some |
 429 | regexps.                                                                |
 430 `------------------------------------------------------------------------*/
 431
 432 static void
 433 initialize_regex (void)
 434 {
 435   int character;                /* character value */
 436
 437   /* Initialize the regex syntax table.  */
 438
 439   for (character = 0; character < CHAR_SET_SIZE; character++)
 440     syntax_table[character] = ISALPHA (character) ? Sword : 0;
 441
 442   /* Initialize the case folding table.  */
 443
 444   if (ignore_case)
 445     for (character = 0; character < CHAR_SET_SIZE; character++)
 446       folded_chars[character] = TOUPPER (character);
 447
 448   /* Unless the user already provided a description of the end of line or
 449      end of sentence sequence, select an end of line sequence to compile.
 450      If the user provided an empty definition, thus disabling end of line
 451      or sentence feature, make it NULL to speed up tests.  If GNU
 452      extensions are enabled, use end of sentence like in GNU emacs.  If
 453      disabled, use end of lines.  */
 454
 455   if (context_regex_string)
 456     {
 457       if (!*context_regex_string)
 458         context_regex_string = NULL;
 459     }
 460   else if (gnu_extensions && !input_reference)
 461     context_regex_string = "[.?!][]\"')}]*\\($\\|\t\\|  \\)[ \t\n]*";
 462   else
 463     context_regex_string = "\n";
 464
 465   if (context_regex_string)
 466     context_regex = alloc_and_compile_regex (context_regex_string);
 467
 468   /* If the user has already provided a non-empty regexp to describe
 469      words, compile it.  Else, unless this has already been done through
 470      a user provided Break character file, construct a fastmap of
 471      characters that may appear in a word.  If GNU extensions enabled,
 472      include only letters of the underlying character set.  If disabled,
 473      include almost everything, even punctuations; stop only on white
 474      space.  */
 475
 476   if (word_regex_string && *word_regex_string)
 477     word_regex = alloc_and_compile_regex (word_regex_string);
 478   else if (!break_file)
 479     {
 480       if (gnu_extensions)
 481         {
 482
 483           /* Simulate \w+.  */
 484
 485           for (character = 0; character < CHAR_SET_SIZE; character++)
 486             word_fastmap[character] = ISALPHA (character) ? 1 : 0;
 487         }
 488       else
 489         {
 490
 491           /* Simulate [^ \t\n]+.  */
 492
 493           memset (word_fastmap, 1, CHAR_SET_SIZE);
 494           word_fastmap[' '] = 0;
 495           word_fastmap['\t'] = 0;
 496           word_fastmap['\n'] = 0;
 497         }
 498     }
 499 }
 500
 501 /*------------------------------------------------------------------------.
 502 | This routine will attempt to swallow a whole file name FILE_NAME into a |
 503 | contiguous region of memory and return a description of it into BLOCK.  |
 504 | Standard input is assumed whenever FILE_NAME is NULL, empty or "-".     |
 505 |                                                                         |
 506 | Previously, in some cases, white space compression was attempted while  |
 507 | inputting text.  This was defeating some regexps like default end of    |
 508 | sentence, which checks for two consecutive spaces.  If white space      |
 509 | compression is ever reinstated, it should be in output routines.        |
 510 `------------------------------------------------------------------------*/
 511
 512 static void
 513 swallow_file_in_memory (const char *file_name, BLOCK *block)
 514 {
 515   int file_handle;              /* file descriptor number */
 516   struct stat stat_block;       /* stat block for file */
 517   size_t allocated_length;      /* allocated length of memory buffer */
 518   size_t used_length;           /* used length in memory buffer */
 519   int read_length;              /* number of character gotten on last read */
 520
 521   /* As special cases, a file name which is NULL or "-" indicates standard
 522      input, which is already opened.  In all other cases, open the file from
 523      its name.  */
 524   bool using_stdin = !file_name || !*file_name || strcmp (file_name, "-") == 0;
 525   if (using_stdin)
 526     file_handle = STDIN_FILENO;
 527   else
 528     if ((file_handle = open (file_name, O_RDONLY)) < 0)
 529       error (EXIT_FAILURE, errno, "%s", file_name);
 530
 531   /* If the file is a plain, regular file, allocate the memory buffer all at
 532      once and swallow the file in one blow.  In other cases, read the file
 533      repeatedly in smaller chunks until we have it all, reallocating memory
 534      once in a while, as we go.  */
 535
 536   if (fstat (file_handle, &stat_block) < 0)
 537     error (EXIT_FAILURE, errno, "%s", file_name);
 538
 539   if (S_ISREG (stat_block.st_mode))
 540     {
 541       size_t in_memory_size;
 542
 543       block->start = xmalloc ((size_t) stat_block.st_size);
 544
 545       if ((in_memory_size = read (file_handle,
 546                                   block->start, (size_t) stat_block.st_size))
 547           != stat_block.st_size)
 548         {
 549 #if MSDOS
 550           /* On MSDOS, in memory size may be smaller than the file
 551              size, because of end of line conversions.  But it can
 552              never be smaller than half the file size, because the
 553              minimum is when all lines are empty and terminated by
 554              CR+LF.  */
 555           if (in_memory_size != (size_t)-1
 556               && in_memory_size >= stat_block.st_size / 2)
 557             block->start = xrealloc (block->start, in_memory_size);
 558           else
 559 #endif /* not MSDOS */
 560
 561             error (EXIT_FAILURE, errno, "%s", file_name);
 562         }
 563       block->end = block->start + in_memory_size;
 564     }
 565   else
 566     {
 567       block->start = xmalloc ((size_t) 1 << SWALLOW_REALLOC_LOG);
 568       used_length = 0;
 569       allocated_length = (1 << SWALLOW_REALLOC_LOG);
 570
 571       while (read_length = read (file_handle,
 572                                  block->start + used_length,
 573                                  allocated_length - used_length),
 574              read_length > 0)
 575         {
 576           used_length += read_length;
 577           if (used_length == allocated_length)
 578             {
 579               allocated_length += (1 << SWALLOW_REALLOC_LOG);
 580               block->start
 581                 = xrealloc (block->start, allocated_length);
 582             }
 583         }
 584
 585       if (read_length < 0)
 586         error (EXIT_FAILURE, errno, "%s", file_name);
 587
 588       block->end = block->start + used_length;
 589     }
 590
 591   /* Close the file, but only if it was not the standard input.  */
 592
 593   if (! using_stdin && close (file_handle) != 0)
 594     error (EXIT_FAILURE, errno, "%s", file_name);
 595 }
 596 \f
 597 /* Sort and search routines.  */
 598
 599 /*--------------------------------------------------------------------------.
 600 | Compare two words, FIRST and SECOND, and return 0 if they are identical.  |
 601 | Return less than 0 if the first word goes before the second; return       |
 602 | greater than 0 if the first word goes after the second.                   |
 603 |                                                                           |
 604 | If a word is indeed a prefix of the other, the shorter should go first.   |
 605 `--------------------------------------------------------------------------*/
 606
 607 static int
 608 compare_words (const void *void_first, const void *void_second)
 609 {
 610 #define first ((const WORD *) void_first)
 611 #define second ((const WORD *) void_second)
 612   int length;                   /* minimum of two lengths */
 613   int counter;                  /* cursor in words */
 614   int value;                    /* value of comparison */
 615
 616   length = first->size < second->size ? first->size : second->size;
 617
 618   if (ignore_case)
 619     {
 620       for (counter = 0; counter < length; counter++)
 621         {
 622           value = (folded_chars [(unsigned char) (first->start[counter])]
 623                    - folded_chars [(unsigned char) (second->start[counter])]);
 624           if (value != 0)
 625             return value;
 626         }
 627     }
 628   else
 629     {
 630       for (counter = 0; counter < length; counter++)
 631         {
 632           value = ((unsigned char) first->start[counter]
 633                    - (unsigned char) second->start[counter]);
 634           if (value != 0)
 635             return value;
 636         }
 637     }
 638
 639   return first->size - second->size;
 640 #undef first
 641 #undef second
 642 }
 643
 644 /*-----------------------------------------------------------------------.
 645 | Decides which of two OCCURS, FIRST or SECOND, should lexicographically |
 646 | go first.  In case of a tie, preserve the original order through a     |
 647 | pointer comparison.                                                    |
 648 `-----------------------------------------------------------------------*/
 649
 650 static int
 651 compare_occurs (const void *void_first, const void *void_second)
 652 {
 653 #define first ((const OCCURS *) void_first)
 654 #define second ((const OCCURS *) void_second)
 655   int value;
 656
 657   value = compare_words (&first->key, &second->key);
 658   return value == 0 ? first->key.start - second->key.start : value;
 659 #undef first
 660 #undef second
 661 }
 662
 663 /*------------------------------------------------------------.
 664 | Return !0 if WORD appears in TABLE.  Uses a binary search.  |
 665 `------------------------------------------------------------*/
 666
 667 static int
 668 search_table (WORD *word, WORD_TABLE *table)
 669 {
 670   int lowest;                   /* current lowest possible index */
 671   int highest;                  /* current highest possible index */
 672   int middle;                   /* current middle index */
 673   int value;                    /* value from last comparison */
 674
 675   lowest = 0;
 676   highest = table->length - 1;
 677   while (lowest <= highest)
 678     {
 679       middle = (lowest + highest) / 2;
 680       value = compare_words (word, table->start + middle);
 681       if (value < 0)
 682         highest = middle - 1;
 683       else if (value > 0)
 684         lowest = middle + 1;
 685       else
 686         return 1;
 687     }
 688   return 0;
 689 }
 690
 691 /*---------------------------------------------------------------------.
 692 | Sort the whole occurs table in memory.  Presumably, `qsort' does not |
 693 | take intermediate copies or table elements, so the sort will be      |
 694 | stabilized throughout the comparison routine.                        |
 695 `---------------------------------------------------------------------*/
 696
 697 static void
 698 sort_found_occurs (void)
 699 {
 700
 701   /* Only one language for the time being.  */
 702
 703   qsort (occurs_table[0], number_of_occurs[0], sizeof (OCCURS),
 704          compare_occurs);
 705 }
 706 \f
 707 /* Parameter files reading routines.  */
 708
 709 /*----------------------------------------------------------------------.
 710 | Read a file named FILE_NAME, containing a set of break characters.    |
 711 | Build a content to the array word_fastmap in which all characters are |
 712 | allowed except those found in the file.  Characters may be repeated.  |
 713 `----------------------------------------------------------------------*/
 714
 715 static void
 716 digest_break_file (const char *file_name)
 717 {
 718   BLOCK file_contents;          /* to receive a copy of the file */
 719   char *cursor;                 /* cursor in file copy */
 720
 721   swallow_file_in_memory (file_name, &file_contents);
 722
 723   /* Make the fastmap and record the file contents in it.  */
 724
 725   memset (word_fastmap, 1, CHAR_SET_SIZE);
 726   for (cursor = file_contents.start; cursor < file_contents.end; cursor++)
 727     word_fastmap[(unsigned char) *cursor] = 0;
 728
 729   if (!gnu_extensions)
 730     {
 731
 732       /* If GNU extensions are enabled, the only way to avoid newline as
 733          a break character is to write all the break characters in the
 734          file with no newline at all, not even at the end of the file.
 735          If disabled, spaces, tabs and newlines are always considered as
 736          break characters even if not included in the break file.  */
 737
 738       word_fastmap[' '] = 0;
 739       word_fastmap['\t'] = 0;
 740       word_fastmap['\n'] = 0;
 741     }
 742
 743   /* Return the space of the file, which is no more required.  */
 744
 745   free (file_contents.start);
 746 }
 747
 748 /*-----------------------------------------------------------------------.
 749 | Read a file named FILE_NAME, containing one word per line, then        |
 750 | construct in TABLE a table of WORD descriptors for them.  The routine  |
 751 | swallows the whole file in memory; this is at the expense of space     |
 752 | needed for newlines, which are useless; however, the reading is fast.  |
 753 `-----------------------------------------------------------------------*/
 754
 755 static void
 756 digest_word_file (const char *file_name, WORD_TABLE *table)
 757 {
 758   BLOCK file_contents;          /* to receive a copy of the file */
 759   char *cursor;                 /* cursor in file copy */
 760   char *word_start;             /* start of the current word */
 761
 762   swallow_file_in_memory (file_name, &file_contents);
 763
 764   table->start = NULL;
 765   table->alloc = 0;
 766   table->length = 0;
 767
 768   /* Read the whole file.  */
 769
 770   cursor = file_contents.start;
 771   while (cursor < file_contents.end)
 772     {
 773
 774       /* Read one line, and save the word in contains.  */
 775
 776       word_start = cursor;
 777       while (cursor < file_contents.end && *cursor != '\n')
 778         cursor++;
 779
 780       /* Record the word in table if it is not empty.  */
 781
 782       if (cursor > word_start)
 783         {
 784           if (table->length == table->alloc)
 785             {
 786               if ((SIZE_MAX / sizeof *table->start - 1) / 2 < table->alloc)
 787                 xalloc_die ();
 788               table->alloc = table->alloc * 2 + 1;
 789               table->start = xrealloc (table->start,
 790                                        table->alloc * sizeof *table->start);
 791             }
 792
 793           table->start[table->length].start = word_start;
 794           table->start[table->length].size = cursor - word_start;
 795           table->length++;
 796         }
 797
 798       /* This test allows for an incomplete line at end of file.  */
 799
 800       if (cursor < file_contents.end)
 801         cursor++;
 802     }
 803
 804   /* Finally, sort all the words read.  */
 805
 806   qsort (table->start, table->length, (size_t) sizeof (WORD), compare_words);
 807 }
 808 \f
 809 /* Keyword recognition and selection.  */
 810
 811 /*----------------------------------------------------------------------.
 812 | For each keyword in the source text, constructs an OCCURS structure.  |
 813 `----------------------------------------------------------------------*/
 814
 815 static void
 816 find_occurs_in_text (void)
 817 {
 818   char *cursor;                 /* for scanning the source text */
 819   char *scan;                   /* for scanning the source text also */
 820   char *line_start;             /* start of the current input line */
 821   char *line_scan;              /* newlines scanned until this point */
 822   int reference_length;         /* length of reference in input mode */
 823   WORD possible_key;            /* possible key, to ease searches */
 824   OCCURS *occurs_cursor;        /* current OCCURS under construction */
 825
 826   char *context_start;          /* start of left context */
 827   char *context_end;            /* end of right context */
 828   char *word_start;             /* start of word */
 829   char *word_end;               /* end of word */
 830   char *next_context_start;     /* next start of left context */
 831
 832   /* reference_length is always used within `if (input_reference)'.
 833      However, GNU C diagnoses that it may be used uninitialized.  The
 834      following assignment is merely to shut it up.  */
 835
 836   reference_length = 0;
 837
 838   /* Tracking where lines start is helpful for reference processing.  In
 839      auto reference mode, this allows counting lines.  In input reference
 840      mode, this permits finding the beginning of the references.
 841
 842      The first line begins with the file, skip immediately this very first
 843      reference in input reference mode, to help further rejection any word
 844      found inside it.  Also, unconditionally assigning these variable has
 845      the happy effect of shutting up lint.  */
 846
 847   line_start = text_buffer.start;
 848   line_scan = line_start;
 849   if (input_reference)
 850     {
 851       SKIP_NON_WHITE (line_scan, text_buffer.end);
 852       reference_length = line_scan - line_start;
 853       SKIP_WHITE (line_scan, text_buffer.end);
 854     }
 855
 856   /* Process the whole buffer, one line or one sentence at a time.  */
 857
 858   for (cursor = text_buffer.start;
 859        cursor < text_buffer.end;
 860        cursor = next_context_start)
 861     {
 862
 863       /* `context_start' gets initialized before the processing of each
 864          line, or once for the whole buffer if no end of line or sentence
 865          sequence separator.  */
 866
 867       context_start = cursor;
 868
 869       /* If a end of line or end of sentence sequence is defined and
 870          non-empty, `next_context_start' will be recomputed to be the end of
 871          each line or sentence, before each one is processed.  If no such
 872          sequence, then `next_context_start' is set at the end of the whole
 873          buffer, which is then considered to be a single line or sentence.
 874          This test also accounts for the case of an incomplete line or
 875          sentence at the end of the buffer.  */
 876
 877       if (context_regex_string
 878           && (re_search (context_regex, cursor, text_buffer.end - cursor,
 879                          0, text_buffer.end - cursor, &context_regs)
 880               >= 0))
 881         next_context_start = cursor + context_regs.end[0];
 882
 883       else
 884         next_context_start = text_buffer.end;
 885
 886       /* Include the separator into the right context, but not any suffix
 887          white space in this separator; this insures it will be seen in
 888          output and will not take more space than necessary.  */
 889
 890       context_end = next_context_start;
 891       SKIP_WHITE_BACKWARDS (context_end, context_start);
 892
 893       /* Read and process a single input line or sentence, one word at a
 894          time.  */
 895
 896       while (1)
 897         {
 898           if (word_regex)
 899
 900             /* If a word regexp has been compiled, use it to skip at the
 901                beginning of the next word.  If there is no such word, exit
 902                the loop.  */
 903
 904             {
 905               if (re_search (word_regex, cursor, context_end - cursor,
 906                              0, context_end - cursor, &word_regs)
 907                   < 0)
 908                 break;
 909               word_start = cursor + word_regs.start[0];
 910               word_end = cursor + word_regs.end[0];
 911             }
 912           else
 913
 914             /* Avoid re_search and use the fastmap to skip to the
 915                beginning of the next word.  If there is no more word in
 916                the buffer, exit the loop.  */
 917
 918             {
 919               scan = cursor;
 920               while (scan < context_end
 921                      && !word_fastmap[(unsigned char) *scan])
 922                 scan++;
 923
 924               if (scan == context_end)
 925                 break;
 926
 927               word_start = scan;
 928
 929               while (scan < context_end
 930                      && word_fastmap[(unsigned char) *scan])
 931                 scan++;
 932
 933               word_end = scan;
 934             }
 935
 936           /* Skip right to the beginning of the found word.  */
 937
 938           cursor = word_start;
 939
 940           /* Skip any zero length word.  Just advance a single position,
 941              then go fetch the next word.  */
 942
 943           if (word_end == word_start)
 944             {
 945               cursor++;
 946               continue;
 947             }
 948
 949           /* This is a genuine, non empty word, so save it as a possible
 950              key.  Then skip over it.  Also, maintain the maximum length of
 951              all words read so far.  It is mandatory to take the maximum
 952              length of all words in the file, without considering if they
 953              are actually kept or rejected, because backward jumps at output
 954              generation time may fall in *any* word.  */
 955
 956           possible_key.start = cursor;
 957           possible_key.size = word_end - word_start;
 958           cursor += possible_key.size;
 959
 960           if (possible_key.size > maximum_word_length)
 961             maximum_word_length = possible_key.size;
 962
 963           /* In input reference mode, update `line_start' from its previous
 964              value.  Count the lines just in case auto reference mode is
 965              also selected. If it happens that the word just matched is
 966              indeed part of a reference; just ignore it.  */
 967
 968           if (input_reference)
 969             {
 970               while (line_scan < possible_key.start)
 971                 if (*line_scan == '\n')
 972                   {
 973                     total_line_count++;
 974                     line_scan++;
 975                     line_start = line_scan;
 976                     SKIP_NON_WHITE (line_scan, text_buffer.end);
 977                     reference_length = line_scan - line_start;
 978                   }
 979                 else
 980                   line_scan++;
 981               if (line_scan > possible_key.start)
 982                 continue;
 983             }
 984
 985           /* Ignore the word if an `Ignore words' table exists and if it is
 986              part of it.  Also ignore the word if an `Only words' table and
 987              if it is *not* part of it.
 988
 989              It is allowed that both tables be used at once, even if this
 990              may look strange for now.  Just ignore a word that would appear
 991              in both.  If regexps are eventually implemented for these
 992              tables, the Ignore table could then reject words that would
 993              have been previously accepted by the Only table.  */
 994
 995           if (ignore_file && search_table (&possible_key, &ignore_table))
 996             continue;
 997           if (only_file && !search_table (&possible_key, &only_table))
 998             continue;
 999
1000           /* A non-empty word has been found.  First of all, insure
1001              proper allocation of the next OCCURS, and make a pointer to
1002              where it will be constructed.  */
1003
1004           if (number_of_occurs[0] == occurs_alloc[0])
1005             {
1006               if ((SIZE_MAX / sizeof *occurs_table[0] - 1) / 2
1007                   < occurs_alloc[0])
1008                 xalloc_die ();
1009               occurs_alloc[0] = occurs_alloc[0] * 2 + 1;
1010               occurs_table[0] = xrealloc (occurs_table[0],
1011                                           occurs_alloc[0] * sizeof *occurs_table[0]);
1012             }
1013
1014           occurs_cursor = occurs_table[0] + number_of_occurs[0];
1015
1016           /* Define the refence field, if any.  */
1017
1018           if (auto_reference)
1019             {
1020
1021               /* While auto referencing, update `line_start' from its
1022                  previous value, counting lines as we go.  If input
1023                  referencing at the same time, `line_start' has been
1024                  advanced earlier, and the following loop is never really
1025                  executed.  */
1026
1027               while (line_scan < possible_key.start)
1028                 if (*line_scan == '\n')
1029                   {
1030                     total_line_count++;
1031                     line_scan++;
1032                     line_start = line_scan;
1033                     SKIP_NON_WHITE (line_scan, text_buffer.end);
1034                   }
1035                 else
1036                   line_scan++;
1037
1038               occurs_cursor->reference = total_line_count;
1039             }
1040           else if (input_reference)
1041             {
1042
1043               /* If only input referencing, `line_start' has been computed
1044                  earlier to detect the case the word matched would be part
1045                  of the reference.  The reference position is simply the
1046                  value of `line_start'.  */
1047
1048               occurs_cursor->reference
1049                 = (DELTA) (line_start - possible_key.start);
1050               if (reference_length > reference_max_width)
1051                 reference_max_width = reference_length;
1052             }
1053
1054           /* Exclude the reference from the context in simple cases.  */
1055
1056           if (input_reference && line_start == context_start)
1057             {
1058               SKIP_NON_WHITE (context_start, context_end);
1059               SKIP_WHITE (context_start, context_end);
1060             }
1061
1062           /* Completes the OCCURS structure.  */
1063
1064           occurs_cursor->key = possible_key;
1065           occurs_cursor->left = context_start - possible_key.start;
1066           occurs_cursor->right = context_end - possible_key.start;
1067
1068           number_of_occurs[0]++;
1069         }
1070     }
1071 }
1072 \f
1073 /* Formatting and actual output - service routines.  */
1074
1075 /*-----------------------------------------.
1076 | Prints some NUMBER of spaces on stdout.  |
1077 `-----------------------------------------*/
1078
1079 static void
1080 print_spaces (int number)
1081 {
1082   int counter;
1083
1084   for (counter = number; counter > 0; counter--)
1085     putchar (' ');
1086 }
1087
1088 /*-------------------------------------.
1089 | Prints the field provided by FIELD.  |
1090 `-------------------------------------*/
1091
1092 static void
1093 print_field (BLOCK field)
1094 {
1095   char *cursor;                 /* Cursor in field to print */
1096   int character;                /* Current character */
1097   int base;                     /* Base character, without diacritic */
1098   int diacritic;                /* Diacritic code for the character */
1099
1100   /* Whitespace is not really compressed.  Instead, each white space
1101      character (tab, vt, ht etc.) is printed as one single space.  */
1102
1103   for (cursor = field.start; cursor < field.end; cursor++)
1104     {
1105       character = (unsigned char) *cursor;
1106       if (edited_flag[character])
1107         {
1108
1109           /* First check if this is a diacriticized character.
1110
1111              This works only for TeX.  I do not know how diacriticized
1112              letters work with `roff'.  Please someone explain it to me!  */
1113
1114           diacritic = todiac (character);
1115           if (diacritic != 0 && output_format == TEX_FORMAT)
1116             {
1117               base = tobase (character);
1118               switch (diacritic)
1119                 {
1120
1121                 case 1:         /* Latin diphthongs */
1122                   switch (base)
1123                     {
1124                     case 'o':
1125                       fputs ("\\oe{}", stdout);
1126                       break;
1127
1128                     case 'O':
1129                       fputs ("\\OE{}", stdout);
1130                       break;
1131
1132                     case 'a':
1133                       fputs ("\\ae{}", stdout);
1134                       break;
1135
1136                     case 'A':
1137                       fputs ("\\AE{}", stdout);
1138                       break;
1139
1140                     default:
1141                       putchar (' ');
1142                     }
1143                   break;
1144
1145                 case 2:         /* Acute accent */
1146                   printf ("\\'%s%c", (base == 'i' ? "\\" : ""), base);
1147                   break;
1148
1149                 case 3:         /* Grave accent */
1150                   printf ("\\`%s%c", (base == 'i' ? "\\" : ""), base);
1151                   break;
1152
1153                 case 4:         /* Circumflex accent */
1154                   printf ("\\^%s%c", (base == 'i' ? "\\" : ""), base);
1155                   break;
1156
1157                 case 5:         /* Diaeresis */
1158                   printf ("\\\"%s%c", (base == 'i' ? "\\" : ""), base);
1159                   break;
1160
1161                 case 6:         /* Tilde accent */
1162                   printf ("\\~%s%c", (base == 'i' ? "\\" : ""), base);
1163                   break;
1164
1165                 case 7:         /* Cedilla */
1166                   printf ("\\c{%c}", base);
1167                   break;
1168
1169                 case 8:         /* Small circle beneath */
1170                   switch (base)
1171                     {
1172                     case 'a':
1173                       fputs ("\\aa{}", stdout);
1174                       break;
1175
1176                     case 'A':
1177                       fputs ("\\AA{}", stdout);
1178                       break;
1179
1180                     default:
1181                       putchar (' ');
1182                     }
1183                   break;
1184
1185                 case 9:         /* Strike through */
1186                   switch (base)
1187                     {
1188                     case 'o':
1189                       fputs ("\\o{}", stdout);
1190                       break;
1191
1192                     case 'O':
1193                       fputs ("\\O{}", stdout);
1194                       break;
1195
1196                     default:
1197                       putchar (' ');
1198                     }
1199                   break;
1200                 }
1201             }
1202           else
1203
1204             /* This is not a diacritic character, so handle cases which are
1205                really specific to `roff' or TeX.  All white space processing
1206                is done as the default case of this switch.  */
1207
1208             switch (character)
1209               {
1210               case '"':
1211                 /* In roff output format, double any quote.  */
1212                 putchar ('"');
1213                 putchar ('"');
1214                 break;
1215
1216               case '$':
1217               case '%':
1218               case '&':
1219               case '#':
1220               case '_':
1221                 /* In TeX output format, precede these with a backslash.  */
1222                 putchar ('\\');
1223                 putchar (character);
1224                 break;
1225
1226               case '{':
1227               case '}':
1228                 /* In TeX output format, precede these with a backslash and
1229                    force mathematical mode.  */
1230                 printf ("$\\%c$", character);
1231                 break;
1232
1233               case '\\':
1234                 /* In TeX output mode, request production of a backslash.  */
1235                 fputs ("\\backslash{}", stdout);
1236                 break;
1237
1238               default:
1239                 /* Any other flagged character produces a single space.  */
1240                 putchar (' ');
1241               }
1242         }
1243       else
1244         putchar (*cursor);
1245     }
1246 }
1247 \f
1248 /* Formatting and actual output - planning routines.  */
1249
1250 /*--------------------------------------------------------------------.
1251 | From information collected from command line options and input file |
1252 | readings, compute and fix some output parameter values.             |
1253 `--------------------------------------------------------------------*/
1254
1255 static void
1256 fix_output_parameters (void)
1257 {
1258   int file_index;               /* index in text input file arrays */
1259   int line_ordinal;             /* line ordinal value for reference */
1260   char ordinal_string[12];      /* edited line ordinal for reference */
1261   int reference_width;          /* width for the whole reference */
1262   int character;                /* character ordinal */
1263   const char *cursor;           /* cursor in some constant strings */
1264
1265   /* In auto reference mode, the maximum width of this field is
1266      precomputed and subtracted from the overall line width.  Add one for
1267      the column which separate the file name from the line number.  */
1268
1269   if (auto_reference)
1270     {
1271       reference_max_width = 0;
1272       for (file_index = 0; file_index < number_input_files; file_index++)
1273         {
1274           line_ordinal = file_line_count[file_index] + 1;
1275           if (file_index > 0)
1276             line_ordinal -= file_line_count[file_index - 1];
1277           sprintf (ordinal_string, "%d", line_ordinal);
1278           reference_width = strlen (ordinal_string);
1279           if (input_file_name[file_index])
1280             reference_width += strlen (input_file_name[file_index]);
1281           if (reference_width > reference_max_width)
1282             reference_max_width = reference_width;
1283         }
1284       reference_max_width++;
1285       reference.start = xmalloc ((size_t) reference_max_width + 1);
1286     }
1287
1288   /* If the reference appears to the left of the output line, reserve some
1289      space for it right away, including one gap size.  */
1290
1291   if ((auto_reference || input_reference) && !right_reference)
1292     line_width -= reference_max_width + gap_size;
1293
1294   /* The output lines, minimally, will contain from left to right a left
1295      context, a gap, and a keyword followed by the right context with no
1296      special intervening gap.  Half of the line width is dedicated to the
1297      left context and the gap, the other half is dedicated to the keyword
1298      and the right context; these values are computed once and for all here.
1299      There also are tail and head wrap around fields, used when the keyword
1300      is near the beginning or the end of the line, or when some long word
1301      cannot fit in, but leave place from wrapped around shorter words.  The
1302      maximum width of these fields are recomputed separately for each line,
1303      on a case by case basis.  It is worth noting that it cannot happen that
1304      both the tail and head fields are used at once.  */
1305
1306   half_line_width = line_width / 2;
1307   before_max_width = half_line_width - gap_size;
1308   keyafter_max_width = half_line_width;
1309
1310   /* If truncation_string is the empty string, make it NULL to speed up
1311      tests.  In this case, truncation_string_length will never get used, so
1312      there is no need to set it.  */
1313
1314   if (truncation_string && *truncation_string)
1315     truncation_string_length = strlen (truncation_string);
1316   else
1317     truncation_string = NULL;
1318
1319   if (gnu_extensions)
1320     {
1321
1322       /* When flagging truncation at the left of the keyword, the
1323          truncation mark goes at the beginning of the before field,
1324          unless there is a head field, in which case the mark goes at the
1325          left of the head field.  When flagging truncation at the right
1326          of the keyword, the mark goes at the end of the keyafter field,
1327          unless there is a tail field, in which case the mark goes at the
1328          end of the tail field.  Only eight combination cases could arise
1329          for truncation marks:
1330
1331          . None.
1332          . One beginning the before field.
1333          . One beginning the head field.
1334          . One ending the keyafter field.
1335          . One ending the tail field.
1336          . One beginning the before field, another ending the keyafter field.
1337          . One ending the tail field, another beginning the before field.
1338          . One ending the keyafter field, another beginning the head field.
1339
1340          So, there is at most two truncation marks, which could appear both
1341          on the left side of the center of the output line, both on the
1342          right side, or one on either side.  */
1343
1344       before_max_width -= 2 * truncation_string_length;
1345       keyafter_max_width -= 2 * truncation_string_length;
1346     }
1347   else
1348     {
1349
1350       /* I never figured out exactly how UNIX' ptx plans the output width
1351          of its various fields.  If GNU extensions are disabled, do not
1352          try computing the field widths correctly; instead, use the
1353          following formula, which does not completely imitate UNIX' ptx,
1354          but almost.  */
1355
1356       keyafter_max_width -= 2 * truncation_string_length + 1;
1357     }
1358
1359   /* Compute which characters need special output processing.  Initialize
1360      by flagging any white space character.  Some systems do not consider
1361      form feed as a space character, but we do.  */
1362
1363   for (character = 0; character < CHAR_SET_SIZE; character++)
1364     edited_flag[character] = ISSPACE (character) != 0;
1365   edited_flag['\f'] = 1;
1366
1367   /* Complete the special character flagging according to selected output
1368      format.  */
1369
1370   switch (output_format)
1371     {
1372     case UNKNOWN_FORMAT:
1373       /* Should never happen.  */
1374
1375     case DUMB_FORMAT:
1376       break;
1377
1378     case ROFF_FORMAT:
1379
1380       /* `Quote' characters should be doubled.  */
1381
1382       edited_flag['"'] = 1;
1383       break;
1384
1385     case TEX_FORMAT:
1386
1387       /* Various characters need special processing.  */
1388
1389       for (cursor = "$%&#_{}\\"; *cursor; cursor++)
1390         edited_flag[(unsigned char) *cursor] = 1;
1391
1392       /* Any character with 8th bit set will print to a single space, unless
1393          it is diacriticized.  */
1394
1395       for (character = 0200; character < CHAR_SET_SIZE; character++)
1396         edited_flag[character] = todiac (character) != 0;
1397       break;
1398     }
1399 }
1400
1401 /*------------------------------------------------------------------.
1402 | Compute the position and length of all the output fields, given a |
1403 | pointer to some OCCURS.                                           |
1404 `------------------------------------------------------------------*/
1405
1406 static void
1407 define_all_fields (OCCURS *occurs)
1408 {
1409   int tail_max_width;           /* allowable width of tail field */
1410   int head_max_width;           /* allowable width of head field */
1411   char *cursor;                 /* running cursor in source text */
1412   char *left_context_start;     /* start of left context */
1413   char *right_context_end;      /* end of right context */
1414   char *left_field_start;       /* conservative start for `head'/`before' */
1415   int file_index;               /* index in text input file arrays */
1416   const char *file_name;        /* file name for reference */
1417   int line_ordinal;             /* line ordinal for reference */
1418
1419   /* Define `keyafter', start of left context and end of right context.
1420      `keyafter' starts at the saved position for keyword and extend to the
1421      right from the end of the keyword, eating separators or full words, but
1422      not beyond maximum allowed width for `keyafter' field or limit for the
1423      right context.  Suffix spaces will be removed afterwards.  */
1424
1425   keyafter.start = occurs->key.start;
1426   keyafter.end = keyafter.start + occurs->key.size;
1427   left_context_start = keyafter.start + occurs->left;
1428   right_context_end = keyafter.start + occurs->right;
1429
1430   cursor = keyafter.end;
1431   while (cursor < right_context_end
1432          && cursor <= keyafter.start + keyafter_max_width)
1433     {
1434       keyafter.end = cursor;
1435       SKIP_SOMETHING (cursor, right_context_end);
1436     }
1437   if (cursor <= keyafter.start + keyafter_max_width)
1438     keyafter.end = cursor;
1439
1440   keyafter_truncation = truncation_string && keyafter.end < right_context_end;
1441
1442   SKIP_WHITE_BACKWARDS (keyafter.end, keyafter.start);
1443
1444   /* When the left context is wide, it might take some time to catch up from
1445      the left context boundary to the beginning of the `head' or `before'
1446      fields.  So, in this case, to speed the catchup, we jump back from the
1447      keyword, using some secure distance, possibly falling in the middle of
1448      a word.  A secure backward jump would be at least half the maximum
1449      width of a line, plus the size of the longest word met in the whole
1450      input.  We conclude this backward jump by a skip forward of at least
1451      one word.  In this manner, we should not inadvertently accept only part
1452      of a word.  From the reached point, when it will be time to fix the
1453      beginning of `head' or `before' fields, we will skip forward words or
1454      delimiters until we get sufficiently near.  */
1455
1456   if (-occurs->left > half_line_width + maximum_word_length)
1457     {
1458       left_field_start
1459         = keyafter.start - (half_line_width + maximum_word_length);
1460       SKIP_SOMETHING (left_field_start, keyafter.start);
1461     }
1462   else
1463     left_field_start = keyafter.start + occurs->left;
1464
1465   /* `before' certainly ends at the keyword, but not including separating
1466      spaces.  It starts after than the saved value for the left context, by
1467      advancing it until it falls inside the maximum allowed width for the
1468      before field.  There will be no prefix spaces either.  `before' only
1469      advances by skipping single separators or whole words. */
1470
1471   before.start = left_field_start;
1472   before.end = keyafter.start;
1473   SKIP_WHITE_BACKWARDS (before.end, before.start);
1474
1475   while (before.start + before_max_width < before.end)
1476     SKIP_SOMETHING (before.start, before.end);
1477
1478   if (truncation_string)
1479     {
1480       cursor = before.start;
1481       SKIP_WHITE_BACKWARDS (cursor, text_buffer.start);
1482       before_truncation = cursor > left_context_start;
1483     }
1484   else
1485     before_truncation = 0;
1486
1487   SKIP_WHITE (before.start, text_buffer.end);
1488
1489   /* The tail could not take more columns than what has been left in the
1490      left context field, and a gap is mandatory.  It starts after the
1491      right context, and does not contain prefixed spaces.  It ends at
1492      the end of line, the end of buffer or when the tail field is full,
1493      whichever comes first.  It cannot contain only part of a word, and
1494      has no suffixed spaces.  */
1495
1496   tail_max_width
1497     = before_max_width - (before.end - before.start) - gap_size;
1498
1499   if (tail_max_width > 0)
1500     {
1501       tail.start = keyafter.end;
1502       SKIP_WHITE (tail.start, text_buffer.end);
1503
1504       tail.end = tail.start;
1505       cursor = tail.end;
1506       while (cursor < right_context_end
1507              && cursor < tail.start + tail_max_width)
1508         {
1509           tail.end = cursor;
1510           SKIP_SOMETHING (cursor, right_context_end);
1511         }
1512
1513       if (cursor < tail.start + tail_max_width)
1514         tail.end = cursor;
1515
1516       if (tail.end > tail.start)
1517         {
1518           keyafter_truncation = 0;
1519           tail_truncation = truncation_string && tail.end < right_context_end;
1520         }
1521       else
1522         tail_truncation = 0;
1523
1524       SKIP_WHITE_BACKWARDS (tail.end, tail.start);
1525     }
1526   else
1527     {
1528
1529       /* No place left for a tail field.  */
1530
1531       tail.start = NULL;
1532       tail.end = NULL;
1533       tail_truncation = 0;
1534     }
1535
1536   /* `head' could not take more columns than what has been left in the right
1537      context field, and a gap is mandatory.  It ends before the left
1538      context, and does not contain suffixed spaces.  Its pointer is advanced
1539      until the head field has shrunk to its allowed width.  It cannot
1540      contain only part of a word, and has no suffixed spaces.  */
1541
1542   head_max_width
1543     = keyafter_max_width - (keyafter.end - keyafter.start) - gap_size;
1544
1545   if (head_max_width > 0)
1546     {
1547       head.end = before.start;
1548       SKIP_WHITE_BACKWARDS (head.end, text_buffer.start);
1549
1550       head.start = left_field_start;
1551       while (head.start + head_max_width < head.end)
1552         SKIP_SOMETHING (head.start, head.end);
1553
1554       if (head.end > head.start)
1555         {
1556           before_truncation = 0;
1557           head_truncation = (truncation_string
1558                              && head.start > left_context_start);
1559         }
1560       else
1561         head_truncation = 0;
1562
1563       SKIP_WHITE (head.start, head.end);
1564     }
1565   else
1566     {
1567
1568       /* No place left for a head field.  */
1569
1570       head.start = NULL;
1571       head.end = NULL;
1572       head_truncation = 0;
1573     }
1574
1575   if (auto_reference)
1576     {
1577
1578       /* Construct the reference text in preallocated space from the file
1579          name and the line number.  Find out in which file the reference
1580          occurred.  Standard input yields an empty file name.  Insure line
1581          numbers are one based, even if they are computed zero based.  */
1582
1583       file_index = 0;
1584       while (file_line_count[file_index] < occurs->reference)
1585         file_index++;
1586
1587       file_name = input_file_name[file_index];
1588       if (!file_name)
1589         file_name = "";
1590
1591       line_ordinal = occurs->reference + 1;
1592       if (file_index > 0)
1593         line_ordinal -= file_line_count[file_index - 1];
1594
1595       sprintf (reference.start, "%s:%d", file_name, line_ordinal);
1596       reference.end = reference.start + strlen (reference.start);
1597     }
1598   else if (input_reference)
1599     {
1600
1601       /* Reference starts at saved position for reference and extends right
1602          until some white space is met.  */
1603
1604       reference.start = keyafter.start + (DELTA) occurs->reference;
1605       reference.end = reference.start;
1606       SKIP_NON_WHITE (reference.end, right_context_end);
1607     }
1608 }
1609 \f
1610 /* Formatting and actual output - control routines.  */
1611
1612 /*----------------------------------------------------------------------.
1613 | Output the current output fields as one line for `troff' or `nroff'.  |
1614 `----------------------------------------------------------------------*/
1615
1616 static void
1617 output_one_roff_line (void)
1618 {
1619   /* Output the `tail' field.  */
1620
1621   printf (".%s \"", macro_name);
1622   print_field (tail);
1623   if (tail_truncation)
1624     fputs (truncation_string, stdout);
1625   putchar ('"');
1626
1627   /* Output the `before' field.  */
1628
1629   fputs (" \"", stdout);
1630   if (before_truncation)
1631     fputs (truncation_string, stdout);
1632   print_field (before);
1633   putchar ('"');
1634
1635   /* Output the `keyafter' field.  */
1636
1637   fputs (" \"", stdout);
1638   print_field (keyafter);
1639   if (keyafter_truncation)
1640     fputs (truncation_string, stdout);
1641   putchar ('"');
1642
1643   /* Output the `head' field.  */
1644
1645   fputs (" \"", stdout);
1646   if (head_truncation)
1647     fputs (truncation_string, stdout);
1648   print_field (head);
1649   putchar ('"');
1650
1651   /* Conditionally output the `reference' field.  */
1652
1653   if (auto_reference || input_reference)
1654     {
1655       fputs (" \"", stdout);
1656       print_field (reference);
1657       putchar ('"');
1658     }
1659
1660   putchar ('\n');
1661 }
1662
1663 /*---------------------------------------------------------.
1664 | Output the current output fields as one line for `TeX'.  |
1665 `---------------------------------------------------------*/
1666
1667 static void
1668 output_one_tex_line (void)
1669 {
1670   BLOCK key;                    /* key field, isolated */
1671   BLOCK after;                  /* after field, isolated */
1672   char *cursor;                 /* running cursor in source text */
1673
1674   printf ("\\%s ", macro_name);
1675   putchar ('{');
1676   print_field (tail);
1677   fputs ("}{", stdout);
1678   print_field (before);
1679   fputs ("}{", stdout);
1680   key.start = keyafter.start;
1681   after.end = keyafter.end;
1682   cursor = keyafter.start;
1683   SKIP_SOMETHING (cursor, keyafter.end);
1684   key.end = cursor;
1685   after.start = cursor;
1686   print_field (key);
1687   fputs ("}{", stdout);
1688   print_field (after);
1689   fputs ("}{", stdout);
1690   print_field (head);
1691   putchar ('}');
1692   if (auto_reference || input_reference)
1693     {
1694       putchar ('{');
1695       print_field (reference);
1696       putchar ('}');
1697     }
1698   putchar ('\n');
1699 }
1700
1701 /*-------------------------------------------------------------------.
1702 | Output the current output fields as one line for a dumb terminal.  |
1703 `-------------------------------------------------------------------*/
1704
1705 static void
1706 output_one_dumb_line (void)
1707 {
1708   if (!right_reference)
1709     {
1710       if (auto_reference)
1711         {
1712
1713           /* Output the `reference' field, in such a way that GNU emacs
1714              next-error will handle it.  The ending colon is taken from the
1715              gap which follows.  */
1716
1717           print_field (reference);
1718           putchar (':');
1719           print_spaces (reference_max_width
1720                         + gap_size
1721                         - (reference.end - reference.start)
1722                         - 1);
1723         }
1724       else
1725         {
1726
1727           /* Output the `reference' field and its following gap.  */
1728
1729           print_field (reference);
1730           print_spaces (reference_max_width
1731                         + gap_size
1732                         - (reference.end - reference.start));
1733         }
1734     }
1735
1736   if (tail.start < tail.end)
1737     {
1738       /* Output the `tail' field.  */
1739
1740       print_field (tail);
1741       if (tail_truncation)
1742         fputs (truncation_string, stdout);
1743
1744       print_spaces (half_line_width - gap_size
1745                     - (before.end - before.start)
1746                     - (before_truncation ? truncation_string_length : 0)
1747                     - (tail.end - tail.start)
1748                     - (tail_truncation ? truncation_string_length : 0));
1749     }
1750   else
1751     print_spaces (half_line_width - gap_size
1752                   - (before.end - before.start)
1753                   - (before_truncation ? truncation_string_length : 0));
1754
1755   /* Output the `before' field.  */
1756
1757   if (before_truncation)
1758     fputs (truncation_string, stdout);
1759   print_field (before);
1760
1761   print_spaces (gap_size);
1762
1763   /* Output the `keyafter' field.  */
1764
1765   print_field (keyafter);
1766   if (keyafter_truncation)
1767     fputs (truncation_string, stdout);
1768
1769   if (head.start < head.end)
1770     {
1771       /* Output the `head' field.  */
1772
1773       print_spaces (half_line_width
1774                     - (keyafter.end - keyafter.start)
1775                     - (keyafter_truncation ? truncation_string_length : 0)
1776                     - (head.end - head.start)
1777                     - (head_truncation ? truncation_string_length : 0));
1778       if (head_truncation)
1779         fputs (truncation_string, stdout);
1780       print_field (head);
1781     }
1782   else
1783
1784     if ((auto_reference || input_reference) && right_reference)
1785       print_spaces (half_line_width
1786                     - (keyafter.end - keyafter.start)
1787                     - (keyafter_truncation ? truncation_string_length : 0));
1788
1789   if ((auto_reference || input_reference) && right_reference)
1790     {
1791       /* Output the `reference' field.  */
1792
1793       print_spaces (gap_size);
1794       print_field (reference);
1795     }
1796
1797   putchar ('\n');
1798 }
1799
1800 /*------------------------------------------------------------------------.
1801 | Scan the whole occurs table and, for each entry, output one line in the |
1802 | appropriate format.                                                     |
1803 `------------------------------------------------------------------------*/
1804
1805 static void
1806 generate_all_output (void)
1807 {
1808   size_t occurs_index;          /* index of keyword entry being processed */
1809   OCCURS *occurs_cursor;        /* current keyword entry being processed */
1810
1811   /* The following assignments are useful to provide default values in case
1812      line contexts or references are not used, in which case these variables
1813      would never be computed.  */
1814
1815   tail.start = NULL;
1816   tail.end = NULL;
1817   tail_truncation = 0;
1818
1819   head.start = NULL;
1820   head.end = NULL;
1821   head_truncation = 0;
1822
1823   /* Loop over all keyword occurrences.  */
1824
1825   occurs_cursor = occurs_table[0];
1826
1827   for (occurs_index = 0; occurs_index < number_of_occurs[0]; occurs_index++)
1828     {
1829       /* Compute the exact size of every field and whenever truncation flags
1830          are present or not.  */
1831
1832       define_all_fields (occurs_cursor);
1833
1834       /* Produce one output line according to selected format.  */
1835
1836       switch (output_format)
1837         {
1838         case UNKNOWN_FORMAT:
1839           /* Should never happen.  */
1840
1841         case DUMB_FORMAT:
1842           output_one_dumb_line ();
1843           break;
1844
1845         case ROFF_FORMAT:
1846           output_one_roff_line ();
1847           break;
1848
1849         case TEX_FORMAT:
1850           output_one_tex_line ();
1851           break;
1852         }
1853
1854       /* Advance the cursor into the occurs table.  */
1855
1856       occurs_cursor++;
1857     }
1858 }
1859 \f
1860 /* Option decoding and main program.  */
1861
1862 /*------------------------------------------------------.
1863 | Print program identification and options, then exit.  |
1864 `------------------------------------------------------*/
1865
1866 void
1867 usage (int status)
1868 {
1869   if (status != EXIT_SUCCESS)
1870     fprintf (stderr, _("Try `%s --help' for more information.\n"),
1871              program_name);
1872   else
1873     {
1874       printf (_("\
1875 Usage: %s [OPTION]... [INPUT]...   (without -G)\n\
1876   or:  %s -G [OPTION]... [INPUT [OUTPUT]]\n"),
1877               program_name, program_name);
1878       fputs (_("\
1879 Output a permuted index, including context, of the words in the input files.\n\
1880 \n\
1881 "), stdout);
1882       fputs (_("\
1883 Mandatory arguments to long options are mandatory for short options too.\n\
1884 "), stdout);
1885       fputs (_("\
1886   -A, --auto-reference           output automatically generated references\n\
1887   -C, --copyright                display Copyright and copying conditions\n\
1888   -G, --traditional              behave more like System V `ptx'\n\
1889   -F, --flag-truncation=STRING   use STRING for flagging line truncations\n\
1890 "), stdout);
1891       fputs (_("\
1892   -M, --macro-name=STRING        macro name to use instead of `xx'\n\
1893   -O, --format=roff              generate output as roff directives\n\
1894   -R, --right-side-refs          put references at right, not counted in -w\n\
1895   -S, --sentence-regexp=REGEXP   for end of lines or end of sentences\n\
1896   -T, --format=tex               generate output as TeX directives\n\
1897 "), stdout);
1898       fputs (_("\
1899   -W, --word-regexp=REGEXP       use REGEXP to match each keyword\n\
1900   -b, --break-file=FILE          word break characters in this FILE\n\
1901   -f, --ignore-case              fold lower case to upper case for sorting\n\
1902   -g, --gap-size=NUMBER          gap size in columns between output fields\n\
1903   -i, --ignore-file=FILE         read ignore word list from FILE\n\
1904   -o, --only-file=FILE           read only word list from this FILE\n\
1905 "), stdout);
1906       fputs (_("\
1907   -r, --references               first field of each line is a reference\n\
1908   -t, --typeset-mode               - not implemented -\n\
1909   -w, --width=NUMBER             output width in columns, reference excluded\n\
1910 "), stdout);
1911       fputs (HELP_OPTION_DESCRIPTION, stdout);
1912       fputs (VERSION_OPTION_DESCRIPTION, stdout);
1913       fputs (_("\
1914 \n\
1915 With no FILE or if FILE is -, read Standard Input.  `-F /' by default.\n\
1916 "), stdout);
1917       printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
1918     }
1919   exit (status);
1920 }
1921
1922 /*----------------------------------------------------------------------.
1923 | Main program.  Decode ARGC arguments passed through the ARGV array of |
1924 | strings, then launch execution.                                       |
1925 `----------------------------------------------------------------------*/
1926
1927 /* Long options equivalences.  */
1928 static const struct option long_options[] =
1929 {
1930   {"auto-reference", no_argument, NULL, 'A'},
1931   {"break-file", required_argument, NULL, 'b'},
1932   {"copyright", no_argument, NULL, 'C'},
1933   {"flag-truncation", required_argument, NULL, 'F'},
1934   {"ignore-case", no_argument, NULL, 'f'},
1935   {"gap-size", required_argument, NULL, 'g'},
1936   {"ignore-file", required_argument, NULL, 'i'},
1937   {"macro-name", required_argument, NULL, 'M'},
1938   {"only-file", required_argument, NULL, 'o'},
1939   {"references", no_argument, NULL, 'r'},
1940   {"right-side-refs", no_argument, NULL, 'R'},
1941   {"format", required_argument, NULL, 10},
1942   {"sentence-regexp", required_argument, NULL, 'S'},
1943   {"traditional", no_argument, NULL, 'G'},
1944   {"typeset-mode", no_argument, NULL, 't'},
1945   {"width", required_argument, NULL, 'w'},
1946   {"word-regexp", required_argument, NULL, 'W'},
1947   {GETOPT_HELP_OPTION_DECL},
1948   {GETOPT_VERSION_OPTION_DECL},
1949   {0, 0, 0, 0},
1950 };
1951
1952 static char const* const format_args[] =
1953 {
1954   "roff", "tex", 0
1955 };
1956
1957 static enum Format const format_vals[] =
1958 {
1959   ROFF_FORMAT, TEX_FORMAT
1960 };
1961
1962 int
1963 main (int argc, char **argv)
1964 {
1965   int optchar;                  /* argument character */
1966   int file_index;               /* index in text input file arrays */
1967
1968   /* Decode program options.  */
1969
1970   initialize_main (&argc, &argv);
1971   program_name = argv[0];
1972   setlocale (LC_ALL, "");
1973   bindtextdomain (PACKAGE, LOCALEDIR);
1974   textdomain (PACKAGE);
1975
1976   atexit (close_stdout);
1977
1978 #if HAVE_SETCHRCLASS
1979   setchrclass (NULL);
1980 #endif
1981
1982   while (optchar = getopt_long (argc, argv, "ACF:GM:ORS:TW:b:i:fg:o:trw:",
1983                                 long_options, NULL),
1984          optchar != EOF)
1985     {
1986       switch (optchar)
1987         {
1988         default:
1989           usage (EXIT_FAILURE);
1990
1991         case 0:
1992           break;
1993
1994         case 'C':
1995           fputs (_("\
1996 This program is free software; you can redistribute it and/or modify\n\
1997 it under the terms of the GNU General Public License as published by\n\
1998 the Free Software Foundation; either version 2, or (at your option)\n\
1999 any later version.\n\
2000 \n\
2001 "), stdout);
2002           fputs (_("\
2003 This program is distributed in the hope that it will be useful,\n\
2004 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
2005 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n\
2006 GNU General Public License for more details.\n\
2007 \n\
2008 "), stdout);
2009           fputs (_("\
2010 You should have received a copy of the GNU General Public License\n\
2011 along with this program; if not, write to the Free Software Foundation,\n\
2012 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n"),
2013                  stdout);
2014
2015           exit (EXIT_SUCCESS);
2016
2017         case 'G':
2018           gnu_extensions = 0;
2019           break;
2020
2021         case 'b':
2022           break_file = optarg;
2023           break;
2024
2025         case 'f':
2026           ignore_case = 1;
2027           break;
2028
2029         case 'g':
2030           gap_size = atoi (optarg);
2031           break;
2032
2033         case 'i':
2034           ignore_file = optarg;
2035           break;
2036
2037         case 'o':
2038           only_file = optarg;
2039           break;
2040
2041         case 'r':
2042           input_reference = 1;
2043           break;
2044
2045         case 't':
2046           /* Yet to understand...  */
2047           break;
2048
2049         case 'w':
2050           line_width = atoi (optarg);
2051           break;
2052
2053         case 'A':
2054           auto_reference = 1;
2055           break;
2056
2057         case 'F':
2058           truncation_string = copy_unescaped_string (optarg);
2059           break;
2060
2061         case 'M':
2062           macro_name = optarg;
2063           break;
2064
2065         case 'O':
2066           output_format = ROFF_FORMAT;
2067           break;
2068
2069         case 'R':
2070           right_reference = 1;
2071           break;
2072
2073         case 'S':
2074           context_regex_string = copy_unescaped_string (optarg);
2075           break;
2076
2077         case 'T':
2078           output_format = TEX_FORMAT;
2079           break;
2080
2081         case 'W':
2082           word_regex_string = copy_unescaped_string (optarg);
2083           break;
2084
2085         case 10:
2086           output_format = XARGMATCH ("--format", optarg,
2087                                      format_args, format_vals);
2088         case_GETOPT_HELP_CHAR;
2089
2090         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2091         }
2092     }
2093
2094   /* Change the default Ignore file if one is defined.  */
2095
2096 #ifdef DEFAULT_IGNORE_FILE
2097   if (!ignore_file)
2098     ignore_file = DEFAULT_IGNORE_FILE;
2099 #endif
2100
2101   /* Process remaining arguments.  If GNU extensions are enabled, process
2102      all arguments as input parameters.  If disabled, accept at most two
2103      arguments, the second of which is an output parameter.  */
2104
2105   if (optind == argc)
2106     {
2107
2108       /* No more argument simply means: read standard input.  */
2109
2110       input_file_name = xmalloc (sizeof *input_file_name);
2111       file_line_count = xmalloc (sizeof *file_line_count);
2112       number_input_files = 1;
2113       input_file_name[0] = NULL;
2114     }
2115   else if (gnu_extensions)
2116     {
2117       number_input_files = argc - optind;
2118       input_file_name = xmalloc (number_input_files * sizeof *input_file_name);
2119       file_line_count = xmalloc (number_input_files * sizeof *file_line_count);
2120
2121       for (file_index = 0; file_index < number_input_files; file_index++)
2122         {
2123           input_file_name[file_index] = argv[optind];
2124           if (!*argv[optind] || strcmp (argv[optind], "-") == 0)
2125             input_file_name[0] = NULL;
2126           else
2127             input_file_name[0] = argv[optind];
2128           optind++;
2129         }
2130     }
2131   else
2132     {
2133
2134       /* There is one necessary input file.  */
2135
2136       number_input_files = 1;
2137       input_file_name = xmalloc (sizeof *input_file_name);
2138       file_line_count = xmalloc (sizeof *file_line_count);
2139       if (!*argv[optind] || strcmp (argv[optind], "-") == 0)
2140         input_file_name[0] = NULL;
2141       else
2142         input_file_name[0] = argv[optind];
2143       optind++;
2144
2145       /* Redirect standard output, only if requested.  */
2146
2147       if (optind < argc)
2148         {
2149           /* FIXME: don't fclose here? */
2150           fclose (stdout);
2151           if (fopen (argv[optind], "w") == NULL)
2152             error (EXIT_FAILURE, errno, "%s", argv[optind]);
2153           optind++;
2154         }
2155
2156       /* Diagnose any other argument as an error.  */
2157
2158       if (optind < argc)
2159         usage (EXIT_FAILURE);
2160     }
2161
2162   /* If the output format has not been explicitly selected, choose dumb
2163      terminal format if GNU extensions are enabled, else `roff' format.  */
2164
2165   if (output_format == UNKNOWN_FORMAT)
2166     output_format = gnu_extensions ? DUMB_FORMAT : ROFF_FORMAT;
2167
2168   /* Initialize the main tables.  */
2169
2170   initialize_regex ();
2171
2172   /* Read `Break character' file, if any.  */
2173
2174   if (break_file)
2175     digest_break_file (break_file);
2176
2177   /* Read `Ignore words' file and `Only words' files, if any.  If any of
2178      these files is empty, reset the name of the file to NULL, to avoid
2179      unnecessary calls to search_table. */
2180
2181   if (ignore_file)
2182     {
2183       digest_word_file (ignore_file, &ignore_table);
2184       if (ignore_table.length == 0)
2185         ignore_file = NULL;
2186     }
2187
2188   if (only_file)
2189     {
2190       digest_word_file (only_file, &only_table);
2191       if (only_table.length == 0)
2192         only_file = NULL;
2193     }
2194
2195   /* Prepare to study all the input files.  */
2196
2197   number_of_occurs[0] = 0;
2198   total_line_count = 0;
2199   maximum_word_length = 0;
2200   reference_max_width = 0;
2201
2202   for (file_index = 0; file_index < number_input_files; file_index++)
2203     {
2204
2205       /* Read the file in core, than study it.  */
2206
2207       swallow_file_in_memory (input_file_name[file_index], &text_buffer);
2208       find_occurs_in_text ();
2209
2210       /* Maintain for each file how many lines has been read so far when its
2211          end is reached.  Incrementing the count first is a simple kludge to
2212          handle a possible incomplete line at end of file.  */
2213
2214       total_line_count++;
2215       file_line_count[file_index] = total_line_count;
2216     }
2217
2218   /* Do the output process phase.  */
2219
2220   sort_found_occurs ();
2221   fix_output_parameters ();
2222   generate_all_output ();
2223
2224   /* All done.  */
2225
2226   exit (EXIT_SUCCESS);
2227 }