src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 88, 91-97, 98 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software
  16    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17
  18    Written December 1988 by Mike Haertel.
  19    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  20    or (US mail) as Mike Haertel c/o Free Software Foundation.
  21
  22    Ørn E. Hansen added NLS support in 1997.  */
  23
  24 #include <config.h>
  25
  26 #include <sys/types.h>
  27 #include <signal.h>
  28 #include <stdio.h>
  29 #include <assert.h>
  30 #include "system.h"
  31 #include "long-options.h"
  32 #include "error.h"
  33 #include "xstrtod.h"
  34 #include "xalloc.h"
  35
  36 #if defined ENABLE_NLS && HAVE_LANGINFO_H
  37 # include <langinfo.h>
  38 #endif
  39
  40 char *xstrdup ();
  41
  42 /* Undefine, to avoid warning about redefinition on some systems.  */
  43 #undef min
  44 #define min(a, b) ((a) < (b) ? (a) : (b))
  45 #undef max
  46 #define max(a, b) ((a) > (b) ? (a) : (b))
  47
  48 #define UCHAR_LIM (UCHAR_MAX + 1)
  49 #define UCHAR(c) ((unsigned char) (c))
  50
  51 #ifndef DEFAULT_TMPDIR
  52 # define DEFAULT_TMPDIR "/tmp"
  53 #endif
  54
  55 /* Use this as exit status in case of error, not EXIT_FAILURE.  This
  56    is necessary because EXIT_FAILURE is usually 1 and POSIX requires
  57    that sort exit with status 1 IFF invoked with -c and the input is
  58    not properly sorted.  Any other irregular exit must exit with a
  59    status code greater than 1.  */
  60 #define SORT_FAILURE 2
  61
  62 #define FLOATING_POINT  '.'
  63 #define FLOATING_COMMA  ','
  64 #define NEGATION_SIGN   '-'
  65 #define NUMERIC_ZERO    '0'
  66
  67 #ifdef ENABLE_NLS
  68 # define NLS_MEMCMP(S1, S2, Len) strncoll (S1, S2, Len)
  69 # define NLS_STRNCMP(S1, S2, Len) strncoll_s2_readonly (S1, S2, Len)
  70 #else
  71 # define NLS_MEMCMP(S1, S2, Len) memcmp (S1, S2, Len)
  72 # define NLS_STRNCMP(S1, S2, Len) strncmp (S1, S2, Len)
  73 #endif
  74
  75 #ifdef ENABLE_NLS
  76
  77 static unsigned char decimal_point;
  78 static unsigned char th_sep;
  79 static char *nls_grouping;
  80
  81 /* This is "C" locale, need another? */
  82 static int need_locale = 0;
  83
  84 /* Should we look for decimal point? */
  85 static int nls_fraction_found = 1;
  86
  87 /* Look for month notations in text? */
  88 static int nls_month_found = 1;
  89
  90 #else
  91
  92 # define decimal_point FLOATING_POINT
  93
  94 #endif
  95
  96 /* If native language support is requested, make a 1-1 map to the
  97    locale character map, otherwise ensure normal behavior.  */
  98 #ifdef ENABLE_NLS
  99
 100 /* 12 months in a year */
 101 # define NLS_NUM_MONTHS 12
 102
 103 /* Maximum number of elements, to allocate per allocation unit  */
 104 # define NLS_MAX_GROUPS 8
 105
 106 /* A string with one character, to enforce char collation */
 107 # define NLS_ONE_CHARACTER_STRING " "
 108
 109 #endif
 110
 111 /* The kind of blanks for '-b' to skip in various options. */
 112 enum blanktype { bl_start, bl_end, bl_both };
 113
 114 /* The character marking end of line. Default to \n. */
 115 int eolchar = '\n';
 116
 117 /* Lines are held in core as counted strings. */
 118 struct line
 119 {
 120   char *text;                   /* Text of the line. */
 121   int length;                   /* Length not including final newline. */
 122   char *keybeg;                 /* Start of first key. */
 123   char *keylim;                 /* Limit of first key. */
 124 };
 125
 126 /* Arrays of lines. */
 127 struct lines
 128 {
 129   struct line *lines;           /* Dynamically allocated array of lines. */
 130   int used;                     /* Number of slots used. */
 131   int alloc;                    /* Number of slots allocated. */
 132   int limit;                    /* Max number of slots to allocate.  */
 133 };
 134
 135 /* Input buffers. */
 136 struct buffer
 137 {
 138   char *buf;                    /* Dynamically allocated buffer. */
 139   int used;                     /* Number of bytes used. */
 140   int alloc;                    /* Number of bytes allocated. */
 141   int left;                     /* Number of bytes left after line parsing. */
 142 };
 143
 144 struct keyfield
 145 {
 146   int sword;                    /* Zero-origin 'word' to start at. */
 147   int schar;                    /* Additional characters to skip. */
 148   int skipsblanks;              /* Skip leading white space at start. */
 149   int eword;                    /* Zero-origin first word after field. */
 150   int echar;                    /* Additional characters in field. */
 151   int skipeblanks;              /* Skip trailing white space at finish. */
 152   int *ignore;                  /* Boolean array of characters to ignore. */
 153   char *translate;              /* Translation applied to characters. */
 154   int numeric;                  /* Flag for numeric comparison.  Handle
 155                                    strings of digits with optional decimal
 156                                    point, but no exponential notation. */
 157   int general_numeric;          /* Flag for general, numeric comparison.
 158                                    Handle numbers in exponential notation. */
 159   int month;                    /* Flag for comparison by month name. */
 160   int reverse;                  /* Reverse the sense of comparison. */
 161   struct keyfield *next;        /* Next keyfield to try. */
 162 };
 163
 164 struct month
 165 {
 166   char *name;
 167   int val;
 168 };
 169
 170 /* The name this program was run with. */
 171 char *program_name;
 172
 173 /* Table of white space. */
 174 static int blanks[UCHAR_LIM];
 175
 176 /* Table of non-printing characters. */
 177 static int nonprinting[UCHAR_LIM];
 178
 179 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 180 static int nondictionary[UCHAR_LIM];
 181
 182 /* Translation table folding lower case to upper. */
 183 static char fold_toupper[UCHAR_LIM];
 184
 185 /* Table mapping 3-letter month names to integers.
 186    Alphabetic order allows binary search. */
 187 static const struct month us_monthtab[] =
 188 {
 189   {"APR", 4},
 190   {"AUG", 8},
 191   {"DEC", 12},
 192   {"FEB", 2},
 193   {"JAN", 1},
 194   {"JUL", 7},
 195   {"JUN", 6},
 196   {"MAR", 3},
 197   {"MAY", 5},
 198   {"NOV", 11},
 199   {"OCT", 10},
 200   {"SEP", 9}
 201 };
 202
 203 #ifdef ENABLE_NLS
 204
 205 /* Locale may have a different idea of month names   */
 206 static struct month nls_monthtab[NLS_NUM_MONTHS];
 207 static int nls_months_collide[NLS_NUM_MONTHS + 1];
 208
 209 /* Numeric keys, to search for numeric format */
 210 struct nls_keyfield
 211 {
 212   struct keyfield *key;
 213   struct nls_keyfield *next;
 214 };
 215
 216 static struct nls_keyfield *nls_keyhead = NULL;
 217
 218 #endif
 219
 220 /* Which month table to use in the program, default C */
 221 static const struct month *monthtab = us_monthtab;
 222
 223 /* During the merge phase, the number of files to merge at once. */
 224 #define NMERGE 16
 225
 226 /* Initial buffer size for in core sorting.  Will not grow unless a
 227    line longer than this is seen. */
 228 static int sortalloc = 512 * 1024;
 229
 230 /* Initial buffer size for in core merge buffers.  Bear in mind that
 231    up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
 232 static int mergealloc = 16 * 1024;
 233
 234 /* Guess of average line length. */
 235 static int linelength = 30;
 236
 237 /* Maximum number of elements for the array(s) of struct line's, in bytes.  */
 238 #define LINEALLOC (256 * 1024)
 239
 240 /* Prefix for temporary file names. */
 241 static char *temp_file_prefix;
 242
 243 /* Flag to reverse the order of all comparisons. */
 244 static int reverse;
 245
 246 /* Flag for stable sort.  This turns off the last ditch bytewise
 247    comparison of lines, and instead leaves lines in the same order
 248    they were read if all keys compare equal.  */
 249 static int stable;
 250
 251 /* Tab character separating fields.  If NUL, then fields are separated
 252    by the empty string between a non-whitespace character and a whitespace
 253    character. */
 254 static char tab;
 255
 256 /* Flag to remove consecutive duplicate lines from the output.
 257    Only the last of a sequence of equal lines will be output. */
 258 static int unique;
 259
 260 /* Nonzero if any of the input files are the standard input. */
 261 static int have_read_stdin;
 262
 263 /* Lists of key field comparisons to be tried. */
 264 static struct keyfield keyhead;
 265
 266 static void
 267 usage (int status)
 268 {
 269   if (status != 0)
 270     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 271              program_name);
 272   else
 273     {
 274       printf (_("\
 275 Usage: %s [OPTION]... [FILE]...\n\
 276 "),
 277               program_name);
 278       printf (_("\
 279 Write sorted concatenation of all FILE(s) to standard output.\n\
 280 \n\
 281   +POS1 [-POS2]    start a key at POS1, end it before POS2\n\
 282   -b               ignore leading blanks in sort fields or keys\n\
 283   -c               check if given files already sorted, do not sort\n\
 284   -d               consider only [a-zA-Z0-9 ] characters in keys\n\
 285   -f               fold lower case to upper case characters in keys\n\
 286   -g               compare according to general numerical value, imply -b\n\
 287   -i               consider only [\\040-\\0176] characters in keys\n\
 288   -k POS1[,POS2]   same as +POS1 [-POS2], but all positions counted from 1\n\
 289   -m               merge already sorted files, do not sort\n\
 290   -M               compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
 291   -n               compare according to string numerical value, imply -b\n\
 292   -o FILE          write result on FILE instead of standard output\n\
 293   -r               reverse the result of comparisons\n\
 294   -s               stabilize sort by disabling last resort comparison\n\
 295   -t SEP           use SEParator instead of non- to whitespace transition\n\
 296   -T DIRECT        use DIRECT for temporary files, not $TMPDIR or %s\n\
 297   -u               with -c, check for strict ordering;\n\
 298                    with -m, only output the first of an equal sequence\n\
 299   -z               end lines with 0 byte, not newline, for find -print0\n\
 300       --help       display this help and exit\n\
 301       --version    output version information and exit\n\
 302 \n\
 303 POS is F[.C][OPTS], where F is the field number and C the character\n\
 304 position in the field, both counted from zero.  OPTS is made up of one\n\
 305 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
 306 for that key.  If no key given, use the entire line as key.  With no\n\
 307 FILE, or when FILE is -, read standard input.\n\
 308 ")
 309               , DEFAULT_TMPDIR);
 310       puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
 311     }
 312   /* Don't use EXIT_FAILURE here in case it is defined to be 1.
 313      POSIX requires that sort return 1 IFF invoked with -c and
 314      the input is not properly sorted.  */
 315   assert (status == 0 || status == SORT_FAILURE);
 316   exit (status);
 317 }
 318
 319 /* The list of temporary files. */
 320 static struct tempnode
 321 {
 322   char *name;
 323   struct tempnode *next;
 324 } temphead;
 325
 326 /* Clean up any remaining temporary files. */
 327
 328 static void
 329 cleanup (void)
 330 {
 331   struct tempnode *node;
 332
 333   for (node = temphead.next; node; node = node->next)
 334     unlink (node->name);
 335 }
 336
 337 static FILE *
 338 xtmpfopen (const char *file)
 339 {
 340   FILE *fp;
 341   int fd;
 342
 343   fd = open (file, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0600);
 344   if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
 345     {
 346       error (0, errno, "%s", file);
 347       cleanup ();
 348       exit (SORT_FAILURE);
 349     }
 350
 351   return fp;
 352 }
 353
 354 static FILE *
 355 xfopen (const char *file, const char *how)
 356 {
 357   FILE *fp;
 358
 359   if (STREQ (file, "-"))
 360     {
 361       fp = stdin;
 362     }
 363   else
 364     {
 365       if ((fp = fopen (file, how)) == NULL)
 366         {
 367           error (0, errno, "%s", file);
 368           cleanup ();
 369           exit (SORT_FAILURE);
 370         }
 371     }
 372
 373   if (fp == stdin)
 374     have_read_stdin = 1;
 375   return fp;
 376 }
 377
 378 static void
 379 xfclose (FILE *fp)
 380 {
 381   if (fp == stdin)
 382     {
 383       /* Allow reading stdin from tty more than once. */
 384       if (feof (fp))
 385         clearerr (fp);
 386     }
 387   else if (fp == stdout)
 388     {
 389       if (fflush (fp) != 0)
 390         {
 391           error (0, errno, _("flushing file"));
 392           cleanup ();
 393           exit (SORT_FAILURE);
 394         }
 395     }
 396   else
 397     {
 398       if (fclose (fp) != 0)
 399         {
 400           error (0, errno, _("error closing file"));
 401           cleanup ();
 402           exit (SORT_FAILURE);
 403         }
 404     }
 405 }
 406
 407 static void
 408 write_bytes (const char *buf, size_t n_bytes, FILE *fp)
 409 {
 410   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
 411     {
 412       error (0, errno, _("write error"));
 413       cleanup ();
 414       exit (SORT_FAILURE);
 415     }
 416 }
 417
 418 /* Return a name for a temporary file. */
 419
 420 static char *
 421 tempname (void)
 422 {
 423   static unsigned int seq;
 424   int len = strlen (temp_file_prefix);
 425   char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
 426   struct tempnode *node;
 427
 428   node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
 429   sprintf (name,
 430            "%s%ssort%5.5d%5.5d",
 431            temp_file_prefix,
 432            (len && temp_file_prefix[len - 1] != '/') ? "/" : "",
 433            (unsigned int) getpid () & 0xffff, seq);
 434
 435   /* Make sure that SEQ's value fits in 5 digits.  */
 436   ++seq;
 437   if (seq >= 100000)
 438     seq = 0;
 439
 440   node->name = name;
 441   node->next = temphead.next;
 442   temphead.next = node;
 443   return name;
 444 }
 445
 446 /* Search through the list of temporary files for NAME;
 447    remove it if it is found on the list. */
 448
 449 static void
 450 zaptemp (const char *name)
 451 {
 452   struct tempnode *node, *temp;
 453
 454   for (node = &temphead; node->next; node = node->next)
 455     if (STREQ (name, node->next->name))
 456       break;
 457   if (node->next)
 458     {
 459       temp = node->next;
 460       unlink (temp->name);
 461       free (temp->name);
 462       node->next = temp->next;
 463       free ((char *) temp);
 464     }
 465 }
 466
 467 #ifdef ENABLE_NLS
 468 /* Initialize the character class tables. */
 469
 470 static int
 471 nls_sort_month_comp (const void *m1, const void *m2)
 472 {
 473   return strcoll (((const struct month *) m1)->name,
 474                   ((const struct month *) m2)->name);
 475 }
 476
 477 /* Do collation on strings S1 and S2, but for at most L characters.
 478    we use the fact, that we KNOW that LEN is the min of the two lengths */
 479 static int
 480 strncoll (char *s1, char *s2, int len)
 481 {
 482   register int diff;
 483
 484   if (need_locale)
 485     {
 486       /* Emulate a strncoll function, by forcing strcoll to compare
 487          only the first LEN characters in each string. */
 488       register unsigned char n1 = s1[len];
 489       register unsigned char n2 = s2[len];
 490
 491       s1[len] = s2[len] = 0;
 492       diff = strcoll (s1, s2);
 493       s1[len] = n1;
 494       s2[len] = n2;
 495     }
 496   else
 497     {
 498       diff = memcmp (s1, s2, len);
 499     }
 500
 501   return diff;
 502 }
 503
 504 /* Do collation on strings S1 and S2, but for at most L characters.
 505    Use the fact, that we KNOW that S2 is the shorter string and has
 506    length LEN.  */
 507 static int
 508 strncoll_s2_readonly (char *s1, const char *s2, int len)
 509 {
 510   register int diff;
 511
 512   assert (len == strlen (s2));
 513   assert (len <= strlen (s1));
 514
 515   if (need_locale)
 516     {
 517       /* Emulate a strncoll function, by forcing strcoll to compare
 518          only the first LEN characters in each string. */
 519       register unsigned char n1 = s1[len];
 520
 521       s1[len] = 0;
 522       diff = strcoll (s1, s2);
 523       s1[len] = n1;
 524     }
 525   else
 526     {
 527       diff = memcmp (s1, s2, len);
 528     }
 529
 530   return diff;
 531 }
 532
 533 #endif /* NLS */
 534
 535 static void
 536 inittables (void)
 537 {
 538   int i;
 539
 540   for (i = 0; i < UCHAR_LIM; ++i)
 541     {
 542       if (ISBLANK (i))
 543         blanks[i] = 1;
 544       if (!ISPRINT (i))
 545         nonprinting[i] = 1;
 546       if (!ISALNUM (i) && !ISBLANK (i))
 547         nondictionary[i] = 1;
 548       if (ISLOWER (i))
 549         fold_toupper[i] = toupper (i);
 550       else
 551         fold_toupper[i] = i;
 552     }
 553
 554 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
 555   /* If We're not in the "C" locale, read in different names for months. */
 556   if (need_locale)
 557     {
 558       nls_months_collide[0] = 1;        /* if an error, look again       */
 559       for (i = 0; i < NLS_NUM_MONTHS; i++)
 560         {
 561           char *s;
 562           size_t s_len;
 563           int j;
 564
 565           s = (char *) nl_langinfo (ABMON_1 + us_monthtab[i].val - 1);
 566           s_len = strlen (s);
 567           nls_monthtab[i].name = (char *) xmalloc (s_len + 1);
 568           nls_monthtab[i].val = us_monthtab[i].val;
 569
 570           /* Be careful: abreviated month names
 571              may be longer than the usual 3 characters.  */
 572           for (j = 0; j < s_len; j++)
 573             nls_monthtab[i].name[j] = fold_toupper[UCHAR (s[j])];
 574           nls_monthtab[i].name[j] = '\0';
 575
 576           nls_months_collide[nls_monthtab[i].val] = 0;
 577           for (j = 0; j < NLS_NUM_MONTHS; ++j)
 578             {
 579               if (STREQ (nls_monthtab[i].name, us_monthtab[i].name))
 580                 {
 581                   /* There are indeed some month names in English which
 582                      collide with the NLS name.  */
 583                   nls_months_collide[nls_monthtab[i].val] = 1;
 584                   break;
 585                 }
 586             }
 587         }
 588       /* Now quicksort the month table (should be sorted already!).
 589          However, another locale doesn't rule out the possibility
 590          of a different order of month names. */
 591       qsort ((void *) nls_monthtab, NLS_NUM_MONTHS,
 592              sizeof (struct month), nls_sort_month_comp);
 593       monthtab = nls_monthtab;
 594     }
 595 #endif /* NLS */
 596 }
 597
 598 /* Initialize BUF, allocating ALLOC bytes initially. */
 599
 600 static void
 601 initbuf (struct buffer *buf, int alloc)
 602 {
 603   buf->alloc = alloc;
 604   buf->buf = xmalloc (buf->alloc);
 605   buf->used = buf->left = 0;
 606 }
 607
 608 /* Fill BUF reading from FP, moving buf->left bytes from the end
 609    of buf->buf to the beginning first.  If EOF is reached and the
 610    file wasn't terminated by a newline, supply one.  Return a count
 611    of bytes buffered. */
 612
 613 static int
 614 fillbuf (struct buffer *buf, FILE *fp)
 615 {
 616   int cc;
 617
 618   memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
 619   buf->used = buf->left;
 620
 621   while (!feof (fp) && (buf->used == 0
 622                         || !memchr (buf->buf, eolchar, buf->used)))
 623     {
 624       if (buf->used == buf->alloc)
 625         {
 626           buf->alloc *= 2;
 627           buf->buf = xrealloc (buf->buf, buf->alloc);
 628         }
 629       cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
 630       if (ferror (fp))
 631         {
 632           error (0, errno, _("read error"));
 633           cleanup ();
 634           exit (SORT_FAILURE);
 635         }
 636       buf->used += cc;
 637     }
 638
 639   if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
 640     {
 641       if (buf->used == buf->alloc)
 642         {
 643           buf->alloc *= 2;
 644           buf->buf = xrealloc (buf->buf, buf->alloc);
 645         }
 646       buf->buf[buf->used++] = eolchar;
 647     }
 648
 649   return buf->used;
 650 }
 651
 652 /* Initialize LINES, allocating space for ALLOC lines initially.
 653    LIMIT is the maximum possible number of lines to allocate space
 654    for, ever.  */
 655
 656 static void
 657 initlines (struct lines *lines, int alloc, int limit)
 658 {
 659   lines->alloc = alloc;
 660   lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
 661   lines->used = 0;
 662   lines->limit = limit;
 663 }
 664
 665 /* Return a pointer to the first character of the field specified
 666    by KEY in LINE. */
 667
 668 static char *
 669 begfield (const struct line *line, const struct keyfield *key)
 670 {
 671   register char *ptr = line->text, *lim = ptr + line->length;
 672   register int sword = key->sword, schar = key->schar;
 673
 674   if (tab)
 675     while (ptr < lim && sword--)
 676       {
 677         while (ptr < lim && *ptr != tab)
 678           ++ptr;
 679         if (ptr < lim)
 680           ++ptr;
 681       }
 682   else
 683     while (ptr < lim && sword--)
 684       {
 685         while (ptr < lim && blanks[UCHAR (*ptr)])
 686           ++ptr;
 687         while (ptr < lim && !blanks[UCHAR (*ptr)])
 688           ++ptr;
 689       }
 690
 691   if (key->skipsblanks)
 692     while (ptr < lim && blanks[UCHAR (*ptr)])
 693       ++ptr;
 694
 695   if (ptr + schar <= lim)
 696     ptr += schar;
 697   else
 698     ptr = lim;
 699
 700   return ptr;
 701 }
 702
 703 /* Return the limit of (a pointer to the first character after) the field
 704    in LINE specified by KEY. */
 705
 706 static char *
 707 limfield (const struct line *line, const struct keyfield *key)
 708 {
 709   register char *ptr = line->text, *lim = ptr + line->length;
 710   register int eword = key->eword, echar = key->echar;
 711
 712   /* Note: from the POSIX spec:
 713      The leading field separator itself is included in
 714      a field when -t is not used.  FIXME: move this comment up... */
 715
 716   /* Move PTR past EWORD fields or to one past the last byte on LINE,
 717      whichever comes first.  If there are more than EWORD fields, leave
 718      PTR pointing at the beginning of the field having zero-based index,
 719      EWORD.  If a delimiter character was specified (via -t), then that
 720      `beginning' is the first character following the delimiting TAB.
 721      Otherwise, leave PTR pointing at the first `blank' character after
 722      the preceding field.  */
 723   if (tab)
 724     while (ptr < lim && eword--)
 725       {
 726         while (ptr < lim && *ptr != tab)
 727           ++ptr;
 728         if (ptr < lim && (eword || echar > 0))
 729           ++ptr;
 730       }
 731   else
 732     while (ptr < lim && eword--)
 733       {
 734         while (ptr < lim && blanks[UCHAR (*ptr)])
 735           ++ptr;
 736         while (ptr < lim && !blanks[UCHAR (*ptr)])
 737           ++ptr;
 738       }
 739
 740 #ifdef POSIX_UNSPECIFIED
 741   /* The following block of code makes GNU sort incompatible with
 742      standard Unix sort, so it's ifdef'd out for now.
 743      The POSIX spec isn't clear on how to interpret this.
 744      FIXME: request clarification.
 745
 746      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
 747      Date: Thu, 30 May 96 12:20:41 -0400
 748
 749      [...]I believe I've found another bug in `sort'.
 750
 751      $ cat /tmp/sort.in
 752      a b c 2 d
 753      pq rs 1 t
 754      $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
 755      a b c 2 d
 756      pq rs 1 t
 757      $ /bin/sort +0.6 -0.7 </tmp/sort.in
 758      pq rs 1 t
 759      a b c 2 d
 760
 761      Unix sort produced the answer I expected: sort on the single character
 762      in column 6.  GNU sort produced different results, because it disagrees
 763      on the interpretation of the key-end spec "-M.N".  Unix sort reads this
 764      as "skip M fields, then N characters"; but GNU sort wants it to mean
 765      "skip M fields, then either N characters or the rest of the current
 766      field, whichever comes first".  This extra clause applies only to
 767      key-ends, not key-starts.
 768      */
 769
 770   /* Make LIM point to the end of (one byte past) the current field.  */
 771   if (tab)
 772     {
 773       char *newlim;
 774       newlim = memchr (ptr, tab, lim - ptr);
 775       if (newlim)
 776         lim = newlim;
 777     }
 778   else
 779     {
 780       char *newlim;
 781       newlim = ptr;
 782       while (newlim < lim && blanks[UCHAR (*newlim)])
 783         ++newlim;
 784       while (newlim < lim && !blanks[UCHAR (*newlim)])
 785         ++newlim;
 786       lim = newlim;
 787     }
 788 #endif
 789
 790   /* If we're skipping leading blanks, don't start counting characters
 791      until after skipping past any leading blanks.  */
 792   if (key->skipsblanks)
 793     while (ptr < lim && blanks[UCHAR (*ptr)])
 794       ++ptr;
 795
 796   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
 797   if (ptr + echar <= lim)
 798     ptr += echar;
 799   else
 800     ptr = lim;
 801
 802   return ptr;
 803 }
 804
 805 /* FIXME */
 806
 807 void
 808 trim_trailing_blanks (const char *a_start, char **a_end)
 809 {
 810   while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
 811     --(*a_end);
 812 }
 813
 814 /* Find the lines in BUF, storing pointers and lengths in LINES.
 815    Also replace newlines in BUF with NULs. */
 816
 817 static void
 818 findlines (struct buffer *buf, struct lines *lines)
 819 {
 820   register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
 821   struct keyfield *key = keyhead.next;
 822
 823   lines->used = 0;
 824
 825   while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))
 826          && lines->used < lines->limit)
 827     {
 828       /* There are various places in the code that rely on a NUL
 829          being at the end of in-core lines; NULs inside the lines
 830          will not cause trouble, though. */
 831       *ptr = '\0';
 832
 833       if (lines->used == lines->alloc)
 834         {
 835           lines->alloc *= 2;
 836           lines->lines = (struct line *)
 837             xrealloc ((char *) lines->lines,
 838                       lines->alloc * sizeof (struct line));
 839         }
 840
 841       lines->lines[lines->used].text = beg;
 842       lines->lines[lines->used].length = ptr - beg;
 843
 844       /* Precompute the position of the first key for efficiency. */
 845       if (key)
 846         {
 847           if (key->eword >= 0)
 848             lines->lines[lines->used].keylim =
 849               limfield (&lines->lines[lines->used], key);
 850           else
 851             lines->lines[lines->used].keylim = ptr;
 852
 853           if (key->sword >= 0)
 854             lines->lines[lines->used].keybeg =
 855               begfield (&lines->lines[lines->used], key);
 856           else
 857             {
 858               if (key->skipsblanks)
 859                 while (blanks[UCHAR (*beg)])
 860                   ++beg;
 861               lines->lines[lines->used].keybeg = beg;
 862             }
 863           if (key->skipeblanks)
 864             {
 865               trim_trailing_blanks (lines->lines[lines->used].keybeg,
 866                                     &lines->lines[lines->used].keylim);
 867             }
 868         }
 869       else
 870         {
 871           lines->lines[lines->used].keybeg = 0;
 872           lines->lines[lines->used].keylim = 0;
 873         }
 874
 875       ++lines->used;
 876       beg = ptr + 1;
 877     }
 878
 879   buf->left = lim - beg;
 880 }
 881
 882 /* Compare strings A and B containing decimal fractions < 1.  Each string
 883    should begin with a decimal point followed immediately by the digits
 884    of the fraction.  Strings not of this form are considered to be zero. */
 885
 886 /* The goal here, is to take two numbers a and b... compare these
 887    in parallel.  Instead of converting each, and then comparing the
 888    outcome.  Most likely stopping the comparison before the conversion
 889    is complete.  The algorithm used, in the old sort:
 890
 891    Algorithm: fraccompare
 892    Action   : compare two decimal fractions
 893    accepts  : char *a, char *b
 894    returns  : -1 if a<b, 0 if a=b, 1 if a>b.
 895    implement:
 896
 897    if *a == decimal_point AND *b == decimal_point
 898      find first character different in a and b.
 899      if both are digits, return the difference *a - *b.
 900      if *a is a digit
 901        skip past zeroes
 902        if digit return 1, else 0
 903      if *b is a digit
 904        skip past zeroes
 905        if digit return -1, else 0
 906    if *a is a decimal_point
 907      skip past decimal_point and zeroes
 908      if digit return 1, else 0
 909    if *b is a decimal_point
 910      skip past decimal_point and zeroes
 911      if digit return -1, else 0
 912    return 0
 913
 914    The above implementation duplicates code, and thus there is room
 915    for improvement:
 916       the difference in code of a and b, is solved by using a
 917       reference to s, assigned to either a or b. and using diff
 918       to denote return value.
 919       the difference in either that start being a digit or
 920       the decimal point, is solved by testing if either is
 921       a decimal point, or if the other is a digit...
 922
 923    if *a or *b is a decimal_point
 924       skip all chars where *a == *b
 925       if *a and *b are digits
 926         return *a - *b
 927       if *a is a digit or *a is a decimal_point
 928         s is a
 929         diff is 1
 930       else
 931         s is b
 932         diff is -1
 933       skip decimal_point in s
 934       skip zeroes in s
 935       if *s is a digit
 936         return diff
 937     return 0 */
 938
 939 #define USE_NEW_FRAC_COMPARE
 940 #ifdef USE_NEW_FRAC_COMPARE
 941
 942 static int
 943 fraccompare (register const char *a, register const char *b)
 944 {
 945 # ifdef ENABLE_NLS
 946   nls_fraction_found = 1;
 947 # endif
 948
 949   if (*a == decimal_point || *b == decimal_point)
 950     {
 951       register const char *s;
 952       int diff;
 953
 954       while (*a == *b)
 955         {
 956           ++a;
 957           ++b;
 958           if (!ISDIGIT (*a))
 959             break;
 960         }
 961
 962       if (ISDIGIT (*a) && ISDIGIT (*b))
 963         return (*a) - (*b);
 964
 965       if (*a == decimal_point || (ISDIGIT (*a) && *b != decimal_point))
 966         {
 967           s = a;
 968           diff = 1;
 969         }
 970       else
 971         {
 972           s = b;
 973           diff = -1;
 974         }
 975       if (*s == decimal_point)
 976         ++s;
 977       while (*s == NUMERIC_ZERO)
 978         ++s;
 979       if (ISDIGIT (*s))
 980         return diff;
 981     }
 982   return 0;
 983 }
 984
 985 #else
 986 static int
 987 fraccompare (register const char *a, register const char *b)
 988 {
 989   register int tmpa = *a;
 990   register int tmpb = *b;
 991
 992   if (tmpa == decimal_point && tmpb == decimal_point)
 993     {
 994       do
 995         tmpa = *++a, tmpb = *++b;
 996       while (tmpa == tmpb && ISDIGIT (tmpa));
 997       if (ISDIGIT (tmpa) && ISDIGIT (tmpb))
 998         return tmpa - tmpb;
 999       if (ISDIGIT (tmpa))
1000         {
1001           while (tmpa == NUMERIC_ZERO)
1002             tmpa = *++a;
1003           if (ISDIGIT (tmpa))
1004             return 1;
1005           return 0;
1006         }
1007       if (ISDIGIT (tmpb))
1008         {
1009           while (tmpb == NUMERIC_ZERO)
1010             tmpb = *++b;
1011           if (ISDIGIT (tmpb))
1012             return -1;
1013           return 0;
1014         }
1015       return 0;
1016     }
1017   else if (tmpa == decimal_point)
1018     {
1019       do
1020         tmpa = *++a;
1021       while (tmpa == NUMERIC_ZERO);
1022       if (ISDIGIT (tmpa))
1023         return 1;
1024       return 0;
1025     }
1026   else if (tmpb == decimal_point)
1027     {
1028       do
1029         tmpb = *++b;
1030       while (tmpb == NUMERIC_ZERO);
1031       if (ISDIGIT (tmpb))
1032         return -1;
1033       return 0;
1034     }
1035   return 0;
1036 }
1037 #endif
1038
1039 /* Compare strings A and B as numbers without explicitly converting them to
1040    machine numbers.  Comparatively slow for short strings, but asymptotically
1041    hideously fast. */
1042
1043 /* The code here, is like the above... continuous reoccurrance of the
1044    same code... improved 15-JAN-1997 in connection with native languages
1045    support */
1046
1047 #ifdef ENABLE_NLS
1048
1049 /* Decide the kind of fraction the program will use */
1050 static void
1051 nls_set_fraction (register unsigned char ch)
1052 {
1053   if (!nls_fraction_found && ch != decimal_point)
1054     {
1055       if (ch == FLOATING_POINT)
1056         {                               /* US style */
1057           decimal_point = FLOATING_POINT;
1058           th_sep = FLOATING_COMMA;
1059         }
1060       else if (ch == FLOATING_COMMA)
1061         {                               /* EU style */
1062           decimal_point = FLOATING_COMMA;
1063           th_sep = FLOATING_POINT;
1064         }
1065       else if (ch != decimal_point)
1066         {                               /* Alien    */
1067           decimal_point = ch;
1068           th_sep = '\0';
1069         }
1070     }
1071   nls_fraction_found = 1;
1072 }
1073
1074 /* Look for a fraction
1075    It isn't as simple as it looks... however, consider a number:
1076       1.234,00
1077       1,234.00
1078    It's easy to tell which is a decimal point, and which isn't.  We use
1079    the grouping information to find out how many digits are grouped together
1080    for thousand separator.
1081
1082    The idea here, is to use the grouping information... but not to
1083    spend time with verifying the groups... not too much time, anyway.
1084    so, a number represented to us as:
1085       1.234.567,89
1086    will be taken and separated into different groups, separated by a
1087    separator character (Decimal point or thousands separator).
1088       {1,234,567}
1089    these are the groups of digits that lead to a separator character,
1090    and with the trailing group is added:
1091       {1,234,567,89}
1092    resulting in 4 groups of numbers.  If the resulting number of groups,
1093    are none, or just 1... this is not enough to decide anything about
1094    the decimal point.  We need at least two for that.  With two groups
1095    we have at least one separator.  That separator can be a decimal
1096    point, or a thousands separator... if it is a thousands separator
1097    the number of digits in the last group, will comply with the first
1098    rule in the grouping rule for numeric values. i.e.
1099       |{89}| = grouping[0]
1100    if so, and there are only two groups of numbers, the value cannot
1101    be determined.  If there are three or more numbers, the separator
1102    separating the groups is checked.  If these are the same, the
1103    character is determined to be a thousands separator.  If they are
1104    not the same, the last separator is determined to be a decimal
1105    point.  If checking the grouping rules, we find out that there
1106    are no grouping rules defined, either the grouping rules is NULL
1107    or the first grouping number is 0, then the locale format is used.
1108
1109    We try to take an advantage of a special situation.  If the trailing
1110    group, the one that normally should be the fractional part, turns
1111    out to have the same length as the thousands separator rule says,
1112    making a doubt on that it may be a decimal point, we look for the
1113    group before that, i.e. with a two group form:
1114      {1234,567}
1115    where the grouping rule is 3;3... we take a look at group 1, and find
1116    out that |{1234}| > larger of the two first grouping rules, then
1117    the separator has to be a decimal point...
1118    */
1119
1120 static void
1121 look_for_fraction (const char *s, const char *e)
1122 {
1123   register const char *p;
1124   register unsigned short n = 0;
1125   static unsigned short max_groups = 0;
1126   static unsigned short *groups = NULL;
1127
1128   if (groups == NULL)
1129     {
1130       max_groups = NLS_MAX_GROUPS;
1131       groups = (unsigned short *) xmalloc (sizeof (*groups) * max_groups);
1132     }
1133
1134   /* skip blanks and signs */
1135   while (blanks[UCHAR (*s)] || *s == NEGATION_SIGN)
1136     s++;
1137   /* groups = {}, n = 0 */
1138   for (p = s; p < e; p++)
1139     {
1140       /* groups[n]={number of digits leading to separator n}
1141          n = number of separators so far */
1142       if (*p == decimal_point || *p == th_sep || *p == FLOATING_POINT)
1143         {
1144           if (++n >= max_groups)
1145             {
1146               /* BIG Number... enlarge table */
1147               max_groups += NLS_MAX_GROUPS;
1148               groups = (unsigned short *) xrealloc ((char *) groups,
1149                                                     (sizeof (*groups)
1150                                                      * max_groups));
1151             }
1152           groups[n] = (unsigned short) (p - s);
1153           s = p + 1;
1154         }
1155       else if (!ISDIGIT (*p))
1156         break;
1157       /* mem[s..p]=digits only */
1158     }
1159   /* n = number of separators in s..e */
1160   groups[++n] = (short) (p - s);
1161   /* n = groups in the number */
1162   if (n <= 1)
1163     return;                     /* Only one group of numbers... not enough */
1164   p = nls_grouping;
1165   /* p = address of group rules
1166      s = address of next character after separator */
1167   s = s - 1;                    /* s = address of last separator */
1168   if (p && *p)
1169     {
1170       /* a legal trailing group, iff groups[n] == first rule */
1171       if (groups[n] != (short) *p)
1172         nls_set_fraction (*s);
1173       else
1174         {
1175           if (n == 2)
1176             {                   /* Only two groups */
1177               if (groups[n - 1] > max (p[0], p[1]))
1178                 nls_set_fraction (*s);
1179               return;
1180             }
1181           /* if the separators are the same, it's a thousands */
1182           if (*s != *(s - groups[n]))
1183             nls_set_fraction (*s);
1184           /* s[0] = thousands separator */
1185           else if (*s == th_sep)
1186             nls_fraction_found = 1;
1187         }
1188     }
1189   else
1190     {
1191       /* no grouping allowed here, last separator IS decimal point */
1192       nls_set_fraction (*s);
1193     }
1194 }
1195
1196 static int
1197 numcompare (register const char *a, register const char *b)
1198 {
1199   int ret_code = 1;             /* normal return status, see later in code */
1200   int diff = 0;                 /* difference between two digits           */
1201
1202   while (blanks[UCHAR (*a)])
1203     ++a;
1204   while (blanks[UCHAR (*b)])
1205     ++b;
1206
1207   /* next character in a,b is non-blank */
1208   if ((*a == NEGATION_SIGN || *b == NEGATION_SIGN) && *a != *b)
1209     {
1210       /* a < 0, or b < 0, but not both */
1211       if (*a == NEGATION_SIGN)
1212         ret_code = -1, ++a;     /* a looks < b */
1213       else if (*b == NEGATION_SIGN)
1214         ret_code = 1, ++b;      /* b looks < a */
1215       /* bypass zeroes, decimal points, and thousand sep in a & b */
1216       while (*a == NUMERIC_ZERO || (th_sep && *a == th_sep)
1217              || *a == decimal_point)
1218         ++a;
1219
1220       while (*b == NUMERIC_ZERO || (th_sep && *b == th_sep)
1221              || *b == decimal_point)
1222         ++b;
1223
1224       if (ISDIGIT (*a) || ISDIGIT (*b))
1225         /* here, either a or b or both are digits
1226            if a and b are digits, the signed one is the lesser.
1227            if a is a digit, and not b.. it means b==0, and if b==0
1228            than either is signed if b is signed then -0 < a
1229            or if a is signed then -a < 0.  The ret_code is already set
1230            to mark that the signed number is the lesser, so we just
1231            return that number here.                                    */
1232         return ret_code;
1233
1234       /* *a and *b are neither digits, they are equal -0 == +0 */
1235       return 0;
1236     }
1237   else
1238     {
1239       /* either both numbers are signed, or both are not-signed */
1240       if (*a == NEGATION_SIGN)
1241         {
1242           ++a;
1243           ++b;
1244           ret_code = -1;
1245         }
1246       /* if both are signed, then remember -100 < -10 (ret_code reversed!) */
1247
1248       /* Skip any leading zeroes */
1249       while (*a == NUMERIC_ZERO)
1250         ++a;
1251       while (*b == NUMERIC_ZERO)
1252         ++b;
1253
1254     continue_thousands:
1255
1256       /* skip all equal digits */
1257       while (ISDIGIT (*a) && ISDIGIT (*b) && *a == *b)
1258         a++, b++;
1259
1260       /* Here, we have either different digits, or possible fractions
1261          or thousand separators. */
1262
1263       if (ISDIGIT (*a) && ISDIGIT (*b))
1264         {
1265           if (diff == 0)
1266             diff = ((*a) - (*b));       /* simple, isn't it? not quite */
1267           a++, b++;
1268           goto continue_thousands;
1269         }
1270
1271       /* now, here either may be a fraction, or a thousand separator...
1272          or both.                                                        */
1273       /* We've decided what are decimal_points, and what are thousands sep */
1274       if ((th_sep != 0) && (*a == th_sep || *b == th_sep))
1275         {
1276           if (*a == th_sep)
1277             ++a;
1278           if (*b == th_sep)
1279             ++b;
1280           goto continue_thousands;      /* Ugly, but better than a while(1) */
1281         }
1282
1283       if (ISDIGIT (*a))
1284         return ret_code;        /* a has more digits than b */
1285       if (ISDIGIT (*b))
1286         return ret_code * -1;   /* b has more digits than a */
1287
1288       /* now, we should have the fractions solved */
1289       if ((diff == 0) && (*a == decimal_point || *b == decimal_point))
1290         return ret_code * fraccompare (a, b);
1291
1292       return diff;              /* fall through here, and diff decides */
1293     }
1294 }
1295 #else
1296 static int
1297 numcompare (register const char *a, register const char *b)
1298 {
1299   register int tmpa, tmpb, loga, logb, tmp;
1300
1301   tmpa = UCHAR (*a);
1302   tmpb = UCHAR (*b);
1303
1304   while (blanks[tmpa])
1305     tmpa = UCHAR (*++a);
1306   while (blanks[tmpb])
1307     tmpb = UCHAR (*++b);
1308
1309   if (tmpa == NEGATION_SIGN)
1310     {
1311       do
1312         tmpa = UCHAR (*++a);
1313       while (tmpa == NUMERIC_ZERO);
1314       if (tmpb != NEGATION_SIGN)
1315         {
1316           if (tmpa == decimal_point)
1317             do
1318               tmpa = *++a;
1319             while (tmpa == NUMERIC_ZERO);
1320           if (ISDIGIT (tmpa))
1321             return -1;
1322           while (tmpb == NUMERIC_ZERO)
1323             tmpb = UCHAR (*++b);
1324           if (tmpb == decimal_point)
1325             do
1326               tmpb = *++b;
1327             while (tmpb == NUMERIC_ZERO);
1328           if (ISDIGIT (tmpb))
1329             return -1;
1330           return 0;
1331         }
1332       do
1333         tmpb = UCHAR (*++b);
1334       while (tmpb == NUMERIC_ZERO);
1335
1336       while (tmpa == tmpb && ISDIGIT (tmpa))
1337         tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
1338
1339       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1340           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1341         return -fraccompare (a, b);
1342
1343       if (ISDIGIT (tmpa))
1344         for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga)
1345           ;
1346       else
1347         loga = 0;
1348
1349       if (ISDIGIT (tmpb))
1350         for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb)
1351           ;
1352       else
1353         logb = 0;
1354
1355       if ((tmp = logb - loga) != 0)
1356         return tmp;
1357
1358       if (!loga)
1359         return 0;
1360
1361       return tmpb - tmpa;
1362     }
1363   else if (tmpb == NEGATION_SIGN)
1364     {
1365       do
1366         tmpb = UCHAR (*++b);
1367       while (tmpb == NUMERIC_ZERO);
1368       if (tmpb == decimal_point)
1369         do
1370           tmpb = *++b;
1371         while (tmpb == NUMERIC_ZERO);
1372       if (ISDIGIT (tmpb))
1373         return 1;
1374       while (tmpa == NUMERIC_ZERO)
1375         tmpa = UCHAR (*++a);
1376       if (tmpa == decimal_point)
1377         do
1378           tmpa = UCHAR (*++a);
1379         while (tmpa == NUMERIC_ZERO);
1380       if (ISDIGIT (tmpa))
1381         return 1;
1382       return 0;
1383     }
1384   else
1385     {
1386       while (tmpa == NUMERIC_ZERO)
1387         tmpa = UCHAR (*++a);
1388       while (tmpb == NUMERIC_ZERO)
1389         tmpb = UCHAR (*++b);
1390
1391       while (tmpa == tmpb && ISDIGIT (tmpa))
1392         tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
1393
1394       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1395           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1396         return fraccompare (a, b);
1397
1398       if (ISDIGIT (tmpa))
1399         for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga)
1400           ;
1401       else
1402         loga = 0;
1403
1404       if (ISDIGIT (tmpb))
1405         for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb)
1406           ;
1407       else
1408         logb = 0;
1409
1410       if ((tmp = loga - logb) != 0)
1411         return tmp;
1412
1413       if (!loga)
1414         return 0;
1415
1416       return tmpa - tmpb;
1417     }
1418 }
1419 #endif
1420
1421 static int
1422 general_numcompare (const char *sa, const char *sb)
1423 {
1424   double a, b;
1425   /* FIXME: add option to warn about failed conversions.  */
1426   /* FIXME: maybe add option to try expensive FP conversion
1427      only if A and B can't be compared more cheaply/accurately.  */
1428   if (xstrtod (sa, NULL, &a))
1429     {
1430       a = 0;
1431     }
1432   if (xstrtod (sb, NULL, &b))
1433     {
1434       b = 0;
1435     }
1436   return a == b ? 0 : a < b ? -1 : 1;
1437 }
1438
1439 /* Return an integer in 1..12 of the month name S with length LEN.
1440    Return 0 if the name in S is not recognized.  */
1441
1442 static int
1443 getmonth (const char *s, int len)
1444 {
1445   char *month;
1446   register int i, lo = 0, hi = 12, result;
1447
1448   while (len > 0 && blanks[UCHAR (*s)])
1449     {
1450       ++s;
1451       --len;
1452     }
1453
1454   if (len == 0)
1455     return 0;
1456
1457   month = (char *) alloca (len + 1);
1458   for (i = 0; i < len; ++i)
1459     month[i] = fold_toupper[UCHAR (s[i])];
1460   while (blanks[UCHAR (month[i - 1])])
1461     --i;
1462   month[i] = '\0';
1463
1464   do
1465     {
1466       int ix = (lo + hi) / 2;
1467
1468       len = strlen (monthtab[ix].name);
1469       if (NLS_STRNCMP (month, monthtab[ix].name, len) < 0)
1470         hi = ix;
1471       else
1472         lo = ix;
1473     }
1474   while (hi - lo > 1);
1475
1476   result = (!strncmp (month, monthtab[lo].name, len) ? monthtab[lo].val : 0);
1477
1478   return result;
1479 }
1480
1481 #ifdef ENABLE_NLS
1482 /* Look for the month in locale table, and if that fails try with
1483    us month name table                                              */
1484 static int
1485 nls_month_is_either_locale (const char *s, int len)
1486 {
1487   int ind;
1488
1489   monthtab = nls_monthtab;
1490   ind = getmonth (s, len);
1491   if (ind == 0)
1492     {
1493       monthtab = us_monthtab;
1494       ind = getmonth (s, len);
1495     }
1496   return ind;
1497 }
1498 #endif
1499
1500 /* Compare two lines A and B trying every key in sequence until there
1501    are no more keys or a difference is found. */
1502
1503 static int
1504 keycompare (const struct line *a, const struct line *b)
1505 {
1506   register char *texta, *textb, *lima, *limb;
1507   register unsigned char *translate;
1508   register int *ignore;
1509   struct keyfield *key;
1510   int diff = 0, iter = 0, lena, lenb;
1511
1512   for (key = keyhead.next; key; key = key->next, ++iter)
1513     {
1514       int comparable_lengths = 1;
1515
1516       ignore = key->ignore;
1517       translate = (unsigned char *) key->translate;
1518
1519       /* Find the beginning and limit of each field. */
1520       if (iter || a->keybeg == NULL || b->keybeg == NULL)
1521         {
1522           if (key->eword >= 0)
1523             lima = limfield (a, key), limb = limfield (b, key);
1524           else
1525             lima = a->text + a->length, limb = b->text + b->length;
1526
1527           if (key->sword >= 0)
1528             texta = begfield (a, key), textb = begfield (b, key);
1529           else
1530             {
1531               texta = a->text, textb = b->text;
1532               if (key->skipsblanks)
1533                 {
1534                   while (texta < lima && blanks[UCHAR (*texta)])
1535                     ++texta;
1536                   while (textb < limb && blanks[UCHAR (*textb)])
1537                     ++textb;
1538                 }
1539             }
1540         }
1541       else
1542         {
1543           /* For the first iteration only, the key positions have
1544              been precomputed for us. */
1545           texta = a->keybeg, lima = a->keylim;
1546           textb = b->keybeg, limb = b->keylim;
1547         }
1548
1549       /* Find the lengths. */
1550       lena = lima - texta, lenb = limb - textb;
1551       if (lena < 0)
1552         lena = 0;
1553       if (lenb < 0)
1554         lenb = 0;
1555
1556       if (key->skipeblanks)
1557         {
1558           char *a_end = texta + lena;
1559           char *b_end = textb + lenb;
1560           trim_trailing_blanks (texta, &a_end);
1561           trim_trailing_blanks (textb, &b_end);
1562           lena = a_end - texta;
1563           lenb = b_end - textb;
1564         }
1565
1566       /* Actually compare the fields. */
1567       if (key->numeric)
1568         {
1569           if (*lima || *limb)
1570             {
1571               char savea = *lima, saveb = *limb;
1572
1573               *lima = *limb = '\0';
1574               diff = numcompare (texta, textb);
1575               *lima = savea, *limb = saveb;
1576             }
1577           else
1578             diff = numcompare (texta, textb);
1579
1580           if (diff)
1581             return key->reverse ? -diff : diff;
1582           continue;
1583         }
1584       else if (key->general_numeric)
1585         {
1586           if (*lima || *limb)
1587             {
1588               char savea = *lima, saveb = *limb;
1589
1590               *lima = *limb = '\0';
1591               diff = general_numcompare (texta, textb);
1592               *lima = savea, *limb = saveb;
1593             }
1594           else
1595             diff = general_numcompare (texta, textb);
1596
1597           if (diff)
1598             return key->reverse ? -diff : diff;
1599           continue;
1600         }
1601       else if (key->month)
1602         {
1603 #ifdef ENABLE_NLS
1604
1605           /* if we haven't decided which locale to go with, we get the
1606              month name from either.  If either month name is fully
1607              solved and the month name doesn't collide with the other
1608              locale... then use that table from there forward */
1609           if (!nls_month_found)
1610             {
1611               int x;
1612
1613               x = nls_month_is_either_locale (texta, lena);
1614               nls_month_found = !nls_months_collide[x];
1615               if (nls_month_found)
1616                 {
1617                   diff = x - getmonth (textb, lenb);
1618                 }
1619               else
1620                 {
1621                   diff = nls_month_is_either_locale (textb, lenb);
1622                   nls_month_found = !nls_months_collide[diff];
1623                   diff = x - diff;
1624                 }
1625             }
1626           else
1627 #endif
1628             diff = getmonth (texta, lena) - getmonth (textb, lenb);
1629           if (diff)
1630             return key->reverse ? -diff : diff;
1631           continue;
1632         }
1633 #ifdef ENABLE_NLS
1634
1635       /* Sorting like this may become slow, so in a simple locale the user
1636          can select a faster sort that is similar to ascii sort  */
1637       else if (need_locale)
1638         {
1639           /* FIXME: consider making parameters non-const, then when
1640              both ignore and translate are NULL (which should be most
1641              of the time) we could temporarily NUL-terminate them in
1642              place and avoid the copy.  */
1643
1644           char *copy_a = (char *) alloca (lena + 1);
1645           char *copy_b = (char *) alloca (lenb + 1);
1646           int new_len_a, new_len_b, i;
1647
1648           /* We can't use strcoll directly on the two strings,
1649              but rather must extract the text for the key
1650              (to NUL-terminate for strcoll) and handle any
1651              'ignore' and/or 'translate' before comparing.   */
1652           for (new_len_a = new_len_b = i = 0; i < max (lena, lenb); i++)
1653             {
1654               if (i < lena)
1655                 {
1656                   copy_a[new_len_a] = (translate
1657                                        ? translate[UCHAR (texta[i])]
1658                                        : texta[i]);
1659                   if (!ignore || !ignore[UCHAR (texta[i])])
1660                     ++new_len_a;
1661                 }
1662               if (i < lenb)
1663                 {
1664                   copy_b[new_len_b] = (translate
1665                                        ? translate[UCHAR (textb[i])]
1666                                        : textb [i]);
1667                   if (!ignore || !ignore[UCHAR (textb[i])])
1668                     ++new_len_b;
1669                 }
1670             }
1671
1672           copy_a[new_len_a] = copy_b[new_len_b] = 0;
1673
1674           diff = strcoll (copy_a, copy_b);
1675
1676           /* Free copy_a and copy_b.  */
1677           alloca (0);
1678
1679           if (diff)
1680             return key->reverse ? -diff : diff;
1681
1682           continue;
1683         }
1684 #endif
1685       else if (ignore && translate)
1686
1687 #define CMP_WITH_IGNORE(A, B)                                           \
1688   do                                                                    \
1689     {                                                                   \
1690           while (texta < lima && textb < limb)                          \
1691             {                                                           \
1692               while (texta < lima && ignore[UCHAR (*texta)])            \
1693                 ++texta;                                                \
1694               while (textb < limb && ignore[UCHAR (*textb)])            \
1695                 ++textb;                                                \
1696               if (texta < lima && textb < limb)                         \
1697                 {                                                       \
1698                   if ((A) != (B))                                       \
1699                     {                                                   \
1700                       diff = UCHAR (A) - UCHAR (B);                     \
1701                       break;                                            \
1702                     }                                                   \
1703                   ++texta;                                              \
1704                   ++textb;                                              \
1705                 }                                                       \
1706                                                                         \
1707               if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1708                 diff = -1;                                              \
1709               else if (texta < lima && textb == limb                    \
1710                        && !ignore[UCHAR (*texta)])                      \
1711                 diff = 1;                                               \
1712             }                                                           \
1713                                                                         \
1714           if (diff == 0)                                                \
1715             {                                                           \
1716               while (texta < lima && ignore[UCHAR (*texta)])            \
1717                 ++texta;                                                \
1718               while (textb < limb && ignore[UCHAR (*textb)])            \
1719                 ++textb;                                                \
1720                                                                         \
1721               if (texta == lima && textb < limb)                        \
1722                 diff = -1;                                              \
1723               else if (texta < lima && textb == limb)                   \
1724                 diff = 1;                                               \
1725             }                                                           \
1726           /* Relative lengths are meaningless if characters were ignored.  \
1727              Handling this case here avoids what might be an invalid length  \
1728              comparison below.  */                                      \
1729           if (diff == 0 && texta == lima && textb == limb)              \
1730             comparable_lengths = 0;                                     \
1731     }                                                                   \
1732   while (0)
1733
1734         CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1735       else if (ignore)
1736         CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1737       else if (translate)
1738         while (texta < lima && textb < limb)
1739           {
1740             if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1741               {
1742                 diff = (UCHAR (translate[UCHAR (*--texta)])
1743                         - UCHAR (translate[UCHAR (*--textb)]));
1744                 break;
1745               }
1746           }
1747       else
1748         {
1749           diff = NLS_MEMCMP (texta, textb, min (lena, lenb));
1750         }
1751
1752       if (diff)
1753         return key->reverse ? -diff : diff;
1754       if (comparable_lengths && (diff = lena - lenb) != 0)
1755         return key->reverse ? -diff : diff;
1756     }
1757
1758   return 0;
1759 }
1760
1761 /* Compare two lines A and B, returning negative, zero, or positive
1762    depending on whether A compares less than, equal to, or greater than B. */
1763
1764 static int
1765 compare (register const struct line *a, register const struct line *b)
1766 {
1767   int diff, tmpa, tmpb, mini;
1768
1769   /* First try to compare on the specified keys (if any).
1770      The only two cases with no key at all are unadorned sort,
1771      and unadorned sort -r. */
1772   if (keyhead.next)
1773     {
1774       diff = keycompare (a, b);
1775       if (diff != 0)
1776         return diff;
1777       if (unique || stable)
1778         return 0;
1779     }
1780
1781   /* If the keys all compare equal (or no keys were specified)
1782      fall through to the default byte-by-byte comparison. */
1783   tmpa = a->length, tmpb = b->length;
1784   mini = min (tmpa, tmpb);
1785   if (mini == 0)
1786     diff = tmpa - tmpb;
1787   else
1788     {
1789       char *ap = a->text, *bp = b->text;
1790
1791 #ifdef ENABLE_NLS
1792       if (need_locale)  /* want absolutely correct sorting */
1793         {
1794           diff = strcoll (ap, bp);
1795           return reverse ? -diff : diff;
1796         }
1797 #endif
1798       diff = UCHAR (*ap) - UCHAR (*bp);
1799       if (diff == 0)
1800         {
1801           diff = NLS_MEMCMP (ap, bp, mini);
1802           if (diff == 0)
1803             diff = tmpa - tmpb;
1804         }
1805     }
1806
1807   return reverse ? -diff : diff;
1808 }
1809
1810 /* Check that the lines read from the given FP come in order.  Print a
1811    diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1812    the line number of the first out-of-order line (counting from 1) if they
1813    are not in order.  Otherwise, print no diagnostic and return zero.  */
1814
1815 static int
1816 checkfp (FILE *fp, const char *file_name)
1817 {
1818   struct buffer buf;            /* Input buffer. */
1819   struct lines lines;           /* Lines scanned from the buffer. */
1820   struct line temp;             /* Copy of previous line. */
1821   int cc;                       /* Character count. */
1822   int alloc;
1823   int line_number = 1;
1824   struct line *disorder_line;
1825   int disorder_line_number = 0;
1826
1827 #ifdef lint  /* Suppress `used before initialized' warning.  */
1828   disorder_line = NULL;
1829 #endif
1830
1831   initbuf (&buf, mergealloc);
1832   initlines (&lines, mergealloc / linelength + 1,
1833              LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1834   alloc = linelength;
1835   temp.text = xmalloc (alloc);
1836
1837   cc = fillbuf (&buf, fp);
1838   if (cc == 0)
1839     goto finish;
1840
1841   findlines (&buf, &lines);
1842
1843   while (1)
1844     {
1845       struct line *prev_line;   /* Pointer to previous line. */
1846       int cmp;                  /* Result of calling compare. */
1847       int i;
1848
1849       /* Compare each line in the buffer with its successor. */
1850       for (i = 0; i < lines.used - 1; ++i)
1851         {
1852           cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1853           if ((unique && cmp >= 0) || (cmp > 0))
1854             {
1855               disorder_line = &lines.lines[i + 1];
1856               disorder_line_number = line_number + i + 1;
1857               goto finish;
1858             }
1859         }
1860
1861       line_number += lines.used;
1862
1863       /* Save the last line of the buffer and refill the buffer. */
1864       prev_line = lines.lines + (lines.used - 1);
1865       if (prev_line->length + 1 > alloc)
1866         {
1867           do
1868             {
1869               alloc *= 2;
1870             }
1871           while (alloc < prev_line->length + 1);
1872           temp.text = xrealloc (temp.text, alloc);
1873         }
1874       assert (prev_line->length + 1 <= alloc);
1875       memcpy (temp.text, prev_line->text, prev_line->length + 1);
1876       temp.length = prev_line->length;
1877       temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1878       temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1879
1880       cc = fillbuf (&buf, fp);
1881       if (cc == 0)
1882         break;
1883
1884       findlines (&buf, &lines);
1885       /* Make sure the line saved from the old buffer contents is
1886          less than or equal to the first line of the new buffer. */
1887       cmp = compare (&temp, &lines.lines[0]);
1888       if ((unique && cmp >= 0) || (cmp > 0))
1889         {
1890           disorder_line = &lines.lines[0];
1891           disorder_line_number = line_number;
1892           break;
1893         }
1894     }
1895
1896 finish:
1897   xfclose (fp);
1898
1899   if (disorder_line_number)
1900     {
1901       fprintf (stderr, _("%s: %s:%d: disorder: "), program_name, file_name,
1902                disorder_line_number);
1903       write_bytes (disorder_line->text, disorder_line->length, stderr);
1904       putc (eolchar, stderr);
1905     }
1906
1907   free (buf.buf);
1908   free ((char *) lines.lines);
1909   free (temp.text);
1910   return disorder_line_number;
1911 }
1912
1913 /* Merge lines from FPS onto OFP.  NFPS cannot be greater than NMERGE.
1914    Close FPS before returning. */
1915
1916 static void
1917 mergefps (FILE **fps, register int nfps, FILE *ofp)
1918 {
1919   struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1920   struct lines lines[NMERGE];   /* Line tables for each buffer. */
1921   struct line saved;            /* Saved line for unique check. */
1922   int savedflag = 0;            /* True if there is a saved line. */
1923   int savealloc;                /* Size allocated for the saved line. */
1924   int cur[NMERGE];              /* Current line in each line table. */
1925   int ord[NMERGE];              /* Table representing a permutation of fps,
1926                                    such that lines[ord[0]].lines[cur[ord[0]]]
1927                                    is the smallest line and will be next
1928                                    output. */
1929   register int i, j, t;
1930
1931 #ifdef lint  /* Suppress `used before initialized' warning.  */
1932   savealloc = 0;
1933 #endif
1934
1935   /* Allocate space for a saved line if necessary. */
1936   if (unique)
1937     {
1938       savealloc = linelength;
1939       saved.text = xmalloc (savealloc);
1940     }
1941
1942   /* Read initial lines from each input file. */
1943   for (i = 0; i < nfps; ++i)
1944     {
1945       initbuf (&buffer[i], mergealloc);
1946       /* If a file is empty, eliminate it from future consideration. */
1947       while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1948         {
1949           xfclose (fps[i]);
1950           --nfps;
1951           for (j = i; j < nfps; ++j)
1952             fps[j] = fps[j + 1];
1953         }
1954       if (i == nfps)
1955         free (buffer[i].buf);
1956       else
1957         {
1958           initlines (&lines[i], mergealloc / linelength + 1,
1959                      LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1960           findlines (&buffer[i], &lines[i]);
1961           cur[i] = 0;
1962         }
1963     }
1964
1965   /* Set up the ord table according to comparisons among input lines.
1966      Since this only reorders two items if one is strictly greater than
1967      the other, it is stable. */
1968   for (i = 0; i < nfps; ++i)
1969     ord[i] = i;
1970   for (i = 1; i < nfps; ++i)
1971     if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1972                  &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1973       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1974
1975   /* Repeatedly output the smallest line until no input remains. */
1976   while (nfps)
1977     {
1978       /* If uniqified output is turned on, output only the first of
1979          an identical series of lines. */
1980       if (unique)
1981         {
1982           if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1983             {
1984               write_bytes (saved.text, saved.length, ofp);
1985               putc (eolchar, ofp);
1986               savedflag = 0;
1987             }
1988           if (!savedflag)
1989             {
1990               if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1991                 {
1992                   while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1993                     savealloc *= 2;
1994                   saved.text = xrealloc (saved.text, savealloc);
1995                 }
1996               saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1997               memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1998                       saved.length + 1);
1999               if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
2000                 {
2001                   saved.keybeg = saved.text +
2002                     (lines[ord[0]].lines[cur[ord[0]]].keybeg
2003                      - lines[ord[0]].lines[cur[ord[0]]].text);
2004                 }
2005               if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
2006                 {
2007                   saved.keylim = saved.text +
2008                     (lines[ord[0]].lines[cur[ord[0]]].keylim
2009                      - lines[ord[0]].lines[cur[ord[0]]].text);
2010                 }
2011               savedflag = 1;
2012             }
2013         }
2014       else
2015         {
2016           write_bytes (lines[ord[0]].lines[cur[ord[0]]].text,
2017                        lines[ord[0]].lines[cur[ord[0]]].length, ofp);
2018           putc (eolchar, ofp);
2019         }
2020
2021       /* Check if we need to read more lines into core. */
2022       if (++cur[ord[0]] == lines[ord[0]].used)
2023         {
2024           if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
2025             {
2026               findlines (&buffer[ord[0]], &lines[ord[0]]);
2027               cur[ord[0]] = 0;
2028             }
2029           else
2030             {
2031               /* We reached EOF on fps[ord[0]]. */
2032               for (i = 1; i < nfps; ++i)
2033                 if (ord[i] > ord[0])
2034                   --ord[i];
2035               --nfps;
2036               xfclose (fps[ord[0]]);
2037               free (buffer[ord[0]].buf);
2038               free ((char *) lines[ord[0]].lines);
2039               for (i = ord[0]; i < nfps; ++i)
2040                 {
2041                   fps[i] = fps[i + 1];
2042                   buffer[i] = buffer[i + 1];
2043                   lines[i] = lines[i + 1];
2044                   cur[i] = cur[i + 1];
2045                 }
2046               for (i = 0; i < nfps; ++i)
2047                 ord[i] = ord[i + 1];
2048               continue;
2049             }
2050         }
2051
2052       /* The new line just read in may be larger than other lines
2053          already in core; push it back in the queue until we encounter
2054          a line larger than it. */
2055       for (i = 1; i < nfps; ++i)
2056         {
2057           t = compare (&lines[ord[0]].lines[cur[ord[0]]],
2058                        &lines[ord[i]].lines[cur[ord[i]]]);
2059           if (!t)
2060             t = ord[0] - ord[i];
2061           if (t < 0)
2062             break;
2063         }
2064       t = ord[0];
2065       for (j = 1; j < i; ++j)
2066         ord[j - 1] = ord[j];
2067       ord[i - 1] = t;
2068     }
2069
2070   if (unique && savedflag)
2071     {
2072       write_bytes (saved.text, saved.length, ofp);
2073       putc (eolchar, ofp);
2074       free (saved.text);
2075     }
2076 }
2077
2078 #ifdef ENABLE_NLS
2079
2080 /* Find the numeric format that this file represents to us for sorting. */
2081 static void
2082 nls_numeric_format (const struct line *line, int nlines)
2083 {
2084   struct nls_keyfield *n_key = nls_keyhead;
2085
2086   /* line = first line, nlines = number of lines,
2087      nls_fraction_found = false                           */
2088   for (; !nls_fraction_found && nlines > 0; line++, nlines--)
2089     {
2090       int iter;
2091       for (iter = 0; !nls_fraction_found; iter++)
2092         {
2093           char *text;
2094           char *lim;
2095           struct keyfield *key = n_key->key;
2096
2097           /* text = {}, lim = {}, key = first key */
2098           if (iter || line->keybeg == NULL)
2099             {
2100               /* Succeding keys, where the key field is
2101                  specified                              */
2102               if (key->eword >= 0) /* key->eword = length of key */
2103                 lim = limfield (line, key);
2104               else
2105                 lim = line->text + line->length;
2106               /* lim = end of key field */
2107
2108               if (key->sword >= 0) /* key->sword = start of key */
2109                 text = begfield (line, key);
2110               else
2111                 text = line->text;
2112               /* text = start of field */
2113             }
2114           else
2115             {
2116               /* First key is always the whole line */
2117               text = line->keybeg;
2118               lim = line->keylim;
2119             }
2120           /* text = start of text to sort
2121              lim  = end of text to sort    */
2122
2123           look_for_fraction (text, lim);
2124
2125           /* nls_fraction_found = decimal_point found? */
2126
2127           if ((n_key = n_key->next) == nls_keyhead)
2128             break;  /* No more keys for this line */
2129         }
2130     }
2131   nls_fraction_found = 1;
2132   /* decide on current decimal_point known */
2133 }
2134
2135 #endif
2136
2137 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
2138
2139 static void
2140 sortlines (struct line *lines, int nlines, struct line *temp)
2141 {
2142   register struct line *lo, *hi, *t;
2143   register int nlo, nhi;
2144
2145   if (nlines == 2)
2146     {
2147       if (compare (&lines[0], &lines[1]) > 0)
2148         {
2149           *temp = lines[0];
2150           lines[0] = lines[1];
2151           lines[1] = *temp;
2152         }
2153       return;
2154     }
2155
2156   nlo = nlines / 2;
2157   lo = lines;
2158   nhi = nlines - nlo;
2159   hi = lines + nlo;
2160
2161   if (nlo > 1)
2162     sortlines (lo, nlo, temp);
2163
2164   if (nhi > 1)
2165     sortlines (hi, nhi, temp);
2166
2167   t = temp;
2168
2169   while (nlo && nhi)
2170     if (compare (lo, hi) <= 0)
2171       *t++ = *lo++, --nlo;
2172     else
2173       *t++ = *hi++, --nhi;
2174   while (nlo--)
2175     *t++ = *lo++;
2176
2177   for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
2178     *lo++ = *t++;
2179 }
2180
2181 /* Check that each of the NFILES FILES is ordered.
2182    Return a count of disordered files. */
2183
2184 static int
2185 check (char **files, int nfiles)
2186 {
2187   int i, disorders = 0;
2188   FILE *fp;
2189
2190   for (i = 0; i < nfiles; ++i)
2191     {
2192       fp = xfopen (files[i], "r");
2193       if (checkfp (fp, files[i]))
2194         {
2195           ++disorders;
2196         }
2197     }
2198   return disorders;
2199 }
2200
2201 /* Merge NFILES FILES onto OFP. */
2202
2203 static void
2204 merge (char **files, int nfiles, FILE *ofp)
2205 {
2206   int i, j, t;
2207   char *temp;
2208   FILE *fps[NMERGE], *tfp;
2209
2210   while (nfiles > NMERGE)
2211     {
2212       t = 0;
2213       for (i = 0; i < nfiles / NMERGE; ++i)
2214         {
2215           for (j = 0; j < NMERGE; ++j)
2216             fps[j] = xfopen (files[i * NMERGE + j], "r");
2217           tfp = xtmpfopen (temp = tempname ());
2218           mergefps (fps, NMERGE, tfp);
2219           xfclose (tfp);
2220           for (j = 0; j < NMERGE; ++j)
2221             zaptemp (files[i * NMERGE + j]);
2222           files[t++] = temp;
2223         }
2224       for (j = 0; j < nfiles % NMERGE; ++j)
2225         fps[j] = xfopen (files[i * NMERGE + j], "r");
2226       tfp = xtmpfopen (temp = tempname ());
2227       mergefps (fps, nfiles % NMERGE, tfp);
2228       xfclose (tfp);
2229       for (j = 0; j < nfiles % NMERGE; ++j)
2230         zaptemp (files[i * NMERGE + j]);
2231       files[t++] = temp;
2232       nfiles = t;
2233     }
2234
2235   for (i = 0; i < nfiles; ++i)
2236     fps[i] = xfopen (files[i], "r");
2237   mergefps (fps, i, ofp);
2238   for (i = 0; i < nfiles; ++i)
2239     zaptemp (files[i]);
2240 }
2241
2242 /* Sort NFILES FILES onto OFP. */
2243
2244 static void
2245 sort (char **files, int nfiles, FILE *ofp)
2246 {
2247   struct buffer buf;
2248   struct lines lines;
2249   struct line *tmp;
2250   int i, ntmp;
2251   FILE *fp, *tfp;
2252   struct tempnode *node;
2253   int n_temp_files = 0;
2254   char **tempfiles;
2255
2256   initbuf (&buf, sortalloc);
2257   initlines (&lines, sortalloc / linelength + 1,
2258              LINEALLOC / sizeof (struct line));
2259   ntmp = lines.alloc;
2260   tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
2261
2262   while (nfiles--)
2263     {
2264       fp = xfopen (*files++, "r");
2265       while (fillbuf (&buf, fp))
2266         {
2267           findlines (&buf, &lines);
2268           if (lines.used > ntmp)
2269             {
2270               while (lines.used > ntmp)
2271                 ntmp *= 2;
2272               tmp = (struct line *)
2273                 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
2274             }
2275 #ifdef ENABLE_NLS
2276           if (nls_keyhead)
2277             nls_keyhead = nls_keyhead->next;
2278           if (!nls_fraction_found && nls_keyhead)
2279             nls_numeric_format (lines.lines, lines.used);
2280 #endif
2281           sortlines (lines.lines, lines.used, tmp);
2282           if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
2283             {
2284               tfp = ofp;
2285             }
2286           else
2287             {
2288               ++n_temp_files;
2289               tfp = xtmpfopen (tempname ());
2290             }
2291           for (i = 0; i < lines.used; ++i)
2292             if (!unique || i == 0
2293                 || compare (&lines.lines[i], &lines.lines[i - 1]))
2294               {
2295                 write_bytes (lines.lines[i].text, lines.lines[i].length, tfp);
2296                 putc (eolchar, tfp);
2297               }
2298           if (tfp != ofp)
2299             xfclose (tfp);
2300         }
2301       xfclose (fp);
2302     }
2303
2304   free (buf.buf);
2305   free ((char *) lines.lines);
2306   free ((char *) tmp);
2307
2308   if (n_temp_files)
2309     {
2310       tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
2311       i = n_temp_files;
2312       for (node = temphead.next; i > 0; node = node->next)
2313         tempfiles[--i] = node->name;
2314       merge (tempfiles, n_temp_files, ofp);
2315       free ((char *) tempfiles);
2316     }
2317 }
2318
2319 /* Insert key KEY at the end of the list (`keyhead'). */
2320
2321 static void
2322 insertkey (struct keyfield *key)
2323 {
2324   struct keyfield *k = &keyhead;
2325
2326   while (k->next)
2327     k = k->next;
2328   k->next = key;
2329   key->next = NULL;
2330 #ifdef ENABLE_NLS
2331   if (key->numeric || key->general_numeric)
2332     {
2333       struct nls_keyfield *nk;
2334
2335       nk = (struct nls_keyfield *) xmalloc (sizeof (struct nls_keyfield));
2336       nk->key = key;
2337       if (nls_keyhead)
2338         {
2339           nk->next = nls_keyhead->next;
2340           nls_keyhead->next = nk;
2341         }
2342       else
2343         nk->next = nk;
2344       nls_keyhead = nk;
2345     }
2346 #endif
2347 }
2348
2349 static void
2350 badfieldspec (const char *s)
2351 {
2352   error (SORT_FAILURE, 0, _("invalid field specification `%s'"), s);
2353 }
2354
2355 /* Handle interrupts and hangups. */
2356
2357 static void
2358 sighandler (int sig)
2359 {
2360 #ifdef SA_INTERRUPT
2361   struct sigaction sigact;
2362
2363   sigact.sa_handler = SIG_DFL;
2364   sigemptyset (&sigact.sa_mask);
2365   sigact.sa_flags = 0;
2366   sigaction (sig, &sigact, NULL);
2367 #else                           /* !SA_INTERRUPT */
2368   signal (sig, SIG_DFL);
2369 #endif                          /* SA_INTERRUPT */
2370   cleanup ();
2371   kill (getpid (), sig);
2372 }
2373
2374 /* Set the ordering options for KEY specified in S.
2375    Return the address of the first character in S that
2376    is not a valid ordering option.
2377    BLANKTYPE is the kind of blanks that 'b' should skip. */
2378
2379 static char *
2380 set_ordering (register const char *s, struct keyfield *key,
2381               enum blanktype blanktype)
2382 {
2383   while (*s)
2384     {
2385       switch (*s)
2386         {
2387         case 'b':
2388           if (blanktype == bl_start || blanktype == bl_both)
2389             key->skipsblanks = 1;
2390           if (blanktype == bl_end || blanktype == bl_both)
2391             key->skipeblanks = 1;
2392           break;
2393         case 'd':
2394           key->ignore = nondictionary;
2395           break;
2396         case 'f':
2397           key->translate = fold_toupper;
2398           break;
2399         case 'g':
2400           key->general_numeric = 1;
2401           break;
2402         case 'i':
2403           key->ignore = nonprinting;
2404           break;
2405         case 'M':
2406           key->month = 1;
2407           break;
2408         case 'n':
2409           key->numeric = 1;
2410           break;
2411         case 'r':
2412           key->reverse = 1;
2413           break;
2414         default:
2415           return (char *) s;
2416         }
2417       ++s;
2418     }
2419   return (char *) s;
2420 }
2421
2422 static void
2423 key_init (struct keyfield *key)
2424 {
2425   memset (key, 0, sizeof (*key));
2426   key->eword = -1;
2427 }
2428
2429 /* strdup and return the result of setlocale, but guard against a NULL
2430    return value.  If setlocale returns NULL, strdup FAIL_VAL instead.  */
2431
2432 #if defined ENABLE_NLS && ( !defined __GLIBC__ || __GLIBC__ < 2 )
2433 static inline char *
2434 my_setlocale (const char *locale, const char *fail_val)
2435 {
2436   char *s = setlocale (LC_ALL, locale);
2437   if (s == NULL)
2438     s = (char *) fail_val;
2439   return xstrdup (s);
2440 }
2441 #endif
2442
2443 int
2444 main (int argc, char **argv)
2445 {
2446   struct keyfield *key = NULL, gkey;
2447   char *s;
2448   int i, t, t2;
2449   int checkonly = 0, mergeonly = 0, nfiles = 0;
2450   char *minus = "-", *outfile = minus, **files, *tmp;
2451   FILE *ofp;
2452 #ifdef SA_INTERRUPT
2453   struct sigaction oldact, newact;
2454 #endif                          /* SA_INTERRUPT */
2455
2456   program_name = argv[0];
2457
2458 #ifdef ENABLE_NLS
2459
2460   /* Determine whether the current locale is C or POSIX.  */
2461 # if defined __GLIBC__ && __GLIBC__ >= 2
2462   s = setlocale (LC_ALL, "");
2463   if (s != NULL && !STREQ (s, "C") && !STREQ (s, "POSIX"))
2464     /* The current locale is neither C nor POSIX.  We'll need to do
2465        more work.  */
2466     need_locale = 1;
2467 # else
2468   {
2469     char *c_locale_string = my_setlocale ("C", "");
2470     char *posix_locale_string = my_setlocale ("POSIX", "");
2471     char *current_locale_string = setlocale (LC_ALL, "");
2472
2473     if (current_locale_string != NULL
2474         && !STREQ (current_locale_string, c_locale_string)
2475         && !STREQ (current_locale_string, posix_locale_string))
2476         /* The current locale is neither C nor POSIX.
2477            We'll need to do more work.  */
2478         need_locale = 1;
2479
2480     free (c_locale_string);
2481     free (posix_locale_string);
2482   }
2483 # endif
2484
2485   /* Let's get locale's representation of the decimal point */
2486   {
2487     struct lconv *lconvp = localeconv ();
2488
2489     decimal_point = *lconvp->decimal_point;
2490     th_sep        = *lconvp->thousands_sep;
2491     nls_grouping  =  (char *) (lconvp->grouping);
2492   }
2493
2494   /* if locale doesn't define a decimal point, we'll use the
2495      US notation.                                            */
2496   if (decimal_point == '\0')
2497     decimal_point = FLOATING_POINT;
2498   else
2499     nls_fraction_found = 0;  /* Figure out which decimal point to use  */
2500
2501   nls_month_found = 0;  /* Figure out which month notation to use */
2502
2503   monthtab = nls_monthtab;
2504
2505 #endif /* NLS */
2506
2507   bindtextdomain (PACKAGE, LOCALEDIR);
2508   textdomain (PACKAGE);
2509
2510   parse_long_options (argc, argv, "sort", GNU_PACKAGE, VERSION, usage);
2511
2512   have_read_stdin = 0;
2513   inittables ();
2514
2515   temp_file_prefix = getenv ("TMPDIR");
2516   if (temp_file_prefix == NULL)
2517     temp_file_prefix = DEFAULT_TMPDIR;
2518
2519   /* Change the way xmalloc and xrealloc fail.  */
2520   xalloc_exit_failure = SORT_FAILURE;
2521   xalloc_fail_func = cleanup;
2522
2523 #ifdef SA_INTERRUPT
2524   newact.sa_handler = sighandler;
2525   sigemptyset (&newact.sa_mask);
2526   newact.sa_flags = 0;
2527
2528   sigaction (SIGINT, NULL, &oldact);
2529   if (oldact.sa_handler != SIG_IGN)
2530     sigaction (SIGINT, &newact, NULL);
2531   sigaction (SIGHUP, NULL, &oldact);
2532   if (oldact.sa_handler != SIG_IGN)
2533     sigaction (SIGHUP, &newact, NULL);
2534   sigaction (SIGPIPE, NULL, &oldact);
2535   if (oldact.sa_handler != SIG_IGN)
2536     sigaction (SIGPIPE, &newact, NULL);
2537   sigaction (SIGTERM, NULL, &oldact);
2538   if (oldact.sa_handler != SIG_IGN)
2539     sigaction (SIGTERM, &newact, NULL);
2540 #else                           /* !SA_INTERRUPT */
2541   if (signal (SIGINT, SIG_IGN) != SIG_IGN)
2542     signal (SIGINT, sighandler);
2543   if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
2544     signal (SIGHUP, sighandler);
2545   if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
2546     signal (SIGPIPE, sighandler);
2547   if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
2548     signal (SIGTERM, sighandler);
2549 #endif                          /* !SA_INTERRUPT */
2550
2551   gkey.sword = gkey.eword = -1;
2552   gkey.ignore = NULL;
2553   gkey.translate = NULL;
2554   gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
2555   gkey.skipsblanks = gkey.skipeblanks = 0;
2556
2557   files = (char **) xmalloc (sizeof (char *) * argc);
2558
2559   for (i = 1; i < argc; ++i)
2560     {
2561       if (argv[i][0] == '+')
2562         {
2563           if (key)
2564             insertkey (key);
2565           key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
2566           key_init (key);
2567           s = argv[i] + 1;
2568           if (! (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1]))))
2569             badfieldspec (argv[i]);
2570           for (t = 0; ISDIGIT (*s); ++s)
2571             t = 10 * t + *s - '0';
2572           t2 = 0;
2573           if (*s == '.')
2574             for (++s; ISDIGIT (*s); ++s)
2575               t2 = 10 * t2 + *s - '0';
2576           if (t2 || t)
2577             {
2578               key->sword = t;
2579               key->schar = t2;
2580             }
2581           else
2582             key->sword = -1;
2583           s = set_ordering (s, key, bl_start);
2584           if (*s)
2585             badfieldspec (argv[i]);
2586         }
2587       else if (argv[i][0] == '-' && argv[i][1])
2588         {
2589           s = argv[i] + 1;
2590           if (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1])))
2591             {
2592               if (!key)
2593                 {
2594                   /* Provoke with `sort -9'.  */
2595                   error (0, 0, _("when using the old-style +POS and -POS \
2596 key specifiers,\nthe +POS specifier must come first"));
2597                   usage (SORT_FAILURE);
2598                 }
2599               for (t = 0; ISDIGIT (*s); ++s)
2600                 t = t * 10 + *s - '0';
2601               t2 = 0;
2602               if (*s == '.')
2603                 for (++s; ISDIGIT (*s); ++s)
2604                   t2 = t2 * 10 + *s - '0';
2605               key->eword = t;
2606               key->echar = t2;
2607               s = set_ordering (s, key, bl_end);
2608               if (*s)
2609                 badfieldspec (argv[i]);
2610               insertkey (key);
2611               key = NULL;
2612             }
2613           else
2614             while (*s)
2615               {
2616                 s = set_ordering (s, &gkey, bl_both);
2617                 switch (*s)
2618                   {
2619                   case '\0':
2620                     break;
2621                   case 'c':
2622                     checkonly = 1;
2623                     break;
2624                   case 'k':
2625                     if (s[1])
2626                       ++s;
2627                     else
2628                       {
2629                         if (i == argc - 1)
2630                           error (SORT_FAILURE, 0,
2631                                  _("option `-k' requires an argument"));
2632                         else
2633                           s = argv[++i];
2634                       }
2635                     if (key)
2636                       insertkey (key);
2637                     key = (struct keyfield *)
2638                       xmalloc (sizeof (struct keyfield));
2639                     key_init (key);
2640                     /* Get POS1. */
2641                     if (!ISDIGIT (*s))
2642                       badfieldspec (argv[i]);
2643                     for (t = 0; ISDIGIT (*s); ++s)
2644                       t = 10 * t + *s - '0';
2645                     if (t == 0)
2646                       {
2647                         /* Provoke with `sort -k0' */
2648                         error (0, 0, _("the starting field number argument \
2649 to the `-k' option must be positive"));
2650                         badfieldspec (argv[i]);
2651                       }
2652                     --t;
2653                     t2 = 0;
2654                     if (*s == '.')
2655                       {
2656                         if (!ISDIGIT (s[1]))
2657                           {
2658                             /* Provoke with `sort -k1.' */
2659                             error (0, 0, _("starting field spec has `.' but \
2660 lacks following character offset"));
2661                             badfieldspec (argv[i]);
2662                           }
2663                         for (++s; ISDIGIT (*s); ++s)
2664                           t2 = 10 * t2 + *s - '0';
2665                         if (t2 == 0)
2666                           {
2667                             /* Provoke with `sort -k1.0' */
2668                             error (0, 0, _("starting field character offset \
2669 argument to the `-k' option\nmust be positive"));
2670                             badfieldspec (argv[i]);
2671                           }
2672                         --t2;
2673                       }
2674                     if (t2 || t)
2675                       {
2676                         key->sword = t;
2677                         key->schar = t2;
2678                       }
2679                     else
2680                       key->sword = -1;
2681                     s = set_ordering (s, key, bl_start);
2682                     if (*s == 0)
2683                       {
2684                         key->eword = -1;
2685                         key->echar = 0;
2686                       }
2687                     else if (*s != ',')
2688                       badfieldspec (argv[i]);
2689                     else if (*s == ',')
2690                       {
2691                         /* Skip over comma.  */
2692                         ++s;
2693                         if (*s == 0)
2694                           {
2695                             /* Provoke with `sort -k1,' */
2696                             error (0, 0, _("field specification has `,' but \
2697 lacks following field spec"));
2698                             badfieldspec (argv[i]);
2699                           }
2700                         /* Get POS2. */
2701                         for (t = 0; ISDIGIT (*s); ++s)
2702                           t = t * 10 + *s - '0';
2703                         if (t == 0)
2704                           {
2705                             /* Provoke with `sort -k1,0' */
2706                             error (0, 0, _("ending field number argument \
2707 to the `-k' option must be positive"));
2708                             badfieldspec (argv[i]);
2709                           }
2710                         --t;
2711                         t2 = 0;
2712                         if (*s == '.')
2713                           {
2714                             if (!ISDIGIT (s[1]))
2715                               {
2716                                 /* Provoke with `sort -k1,1.' */
2717                                 error (0, 0, _("ending field spec has `.' \
2718 but lacks following character offset"));
2719                                 badfieldspec (argv[i]);
2720                               }
2721                             for (++s; ISDIGIT (*s); ++s)
2722                               t2 = t2 * 10 + *s - '0';
2723                           }
2724                         else
2725                           {
2726                             /* `-k 2,3' is equivalent to `+1 -3'.  */
2727                             ++t;
2728                           }
2729                         key->eword = t;
2730                         key->echar = t2;
2731                         s = set_ordering (s, key, bl_end);
2732                         if (*s)
2733                           badfieldspec (argv[i]);
2734                       }
2735                     insertkey (key);
2736                     key = NULL;
2737                     goto outer;
2738                   case 'm':
2739                     mergeonly = 1;
2740                     break;
2741                   case 'o':
2742                     if (s[1])
2743                       outfile = s + 1;
2744                     else
2745                       {
2746                         if (i == argc - 1)
2747                           error (SORT_FAILURE, 0,
2748                                  _("option `-o' requires an argument"));
2749                         else
2750                           outfile = argv[++i];
2751                       }
2752                     goto outer;
2753                   case 's':
2754                     stable = 1;
2755                     break;
2756                   case 't':
2757                     if (s[1])
2758                       tab = *++s;
2759                     else if (i < argc - 1)
2760                       {
2761                         tab = *argv[++i];
2762                         goto outer;
2763                       }
2764                     else
2765                       error (SORT_FAILURE, 0,
2766                              _("option `-t' requires an argument"));
2767                     break;
2768                   case 'T':
2769                     if (s[1])
2770                       temp_file_prefix = ++s;
2771                     else
2772                       {
2773                         if (i < argc - 1)
2774                           temp_file_prefix = argv[++i];
2775                         else
2776                           error (SORT_FAILURE, 0,
2777                                  _("option `-T' requires an argument"));
2778                       }
2779                     goto outer;
2780                     /* break; */
2781                   case 'u':
2782                     unique = 1;
2783                     break;
2784                   case 'z':
2785                     eolchar = 0;
2786                     break;
2787                   case 'y':
2788                     /* Accept and ignore e.g. -y0 for compatibility with
2789                        Solaris 2.  */
2790                     goto outer;
2791                   default:
2792                     fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2793                              argv[0], *s);
2794                     usage (SORT_FAILURE);
2795                   }
2796                 if (*s)
2797                   ++s;
2798               }
2799         }
2800       else                      /* Not an option. */
2801         {
2802           files[nfiles++] = argv[i];
2803         }
2804     outer:;
2805     }
2806
2807   if (key)
2808     insertkey (key);
2809
2810   /* Inheritance of global options to individual keys. */
2811   for (key = keyhead.next; key; key = key->next)
2812     if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2813         && !key->skipeblanks && !key->month && !key->numeric
2814         && !key->general_numeric)
2815       {
2816         key->ignore = gkey.ignore;
2817         key->translate = gkey.translate;
2818         key->skipsblanks = gkey.skipsblanks;
2819         key->skipeblanks = gkey.skipeblanks;
2820         key->month = gkey.month;
2821         key->numeric = gkey.numeric;
2822         key->general_numeric = gkey.general_numeric;
2823         key->reverse = gkey.reverse;
2824       }
2825
2826   if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2827                         || gkey.skipeblanks || gkey.month || gkey.numeric
2828                         || gkey.general_numeric))
2829     insertkey (&gkey);
2830   reverse = gkey.reverse;
2831
2832   if (nfiles == 0)
2833     {
2834       nfiles = 1;
2835       files = &minus;
2836     }
2837
2838   if (checkonly)
2839     {
2840       /* POSIX requires that sort return 1 IFF invoked with -c and the
2841          input is not properly sorted.  */
2842       exit (check (files, nfiles) == 0 ? 0 : 1);
2843     }
2844
2845   if (!STREQ (outfile, "-"))
2846     {
2847       struct stat outstat;
2848       if (stat (outfile, &outstat) == 0)
2849         {
2850           /* The following code prevents a race condition when
2851              people use the brain dead shell programming idiom:
2852                   cat file | sort -o file
2853              This feature is provided for historical compatibility,
2854              but we strongly discourage ever relying on this in
2855              new shell programs. */
2856
2857           /* Temporarily copy each input file that might be another name
2858              for the output file.  When in doubt (e.g. a pipe), copy.  */
2859           for (i = 0; i < nfiles; ++i)
2860             {
2861               char buf[8192];
2862               FILE *in_fp;
2863               FILE *out_fp;
2864               int cc;
2865
2866               if (S_ISREG (outstat.st_mode) && !STREQ (outfile, files[i]))
2867                 {
2868                   struct stat instat;
2869                   if ((STREQ (files[i], "-")
2870                        ? fstat (STDIN_FILENO, &instat)
2871                        : stat (files[i], &instat)) != 0)
2872                     {
2873                       error (0, errno, "%s", files[i]);
2874                       cleanup ();
2875                       exit (SORT_FAILURE);
2876                     }
2877                   if (S_ISREG (instat.st_mode) && !SAME_INODE (instat, outstat))
2878                     {
2879                       /* We know the files are distinct.  */
2880                       continue;
2881                     }
2882                 }
2883
2884               in_fp = xfopen (files[i], "r");
2885               tmp = tempname ();
2886               out_fp = xtmpfopen (tmp);
2887               /* FIXME: maybe use copy.c(copy) here. */
2888               while ((cc = fread (buf, 1, sizeof buf, in_fp)) > 0)
2889                 write_bytes (buf, cc, out_fp);
2890               if (ferror (in_fp))
2891                 {
2892                   error (0, errno, "%s", files[i]);
2893                   cleanup ();
2894                   exit (SORT_FAILURE);
2895                 }
2896               xfclose (out_fp);
2897               xfclose (in_fp);
2898               files[i] = tmp;
2899             }
2900           ofp = xfopen (outfile, "w");
2901         }
2902       else
2903         {
2904           /* A non-`-' outfile was specified, but the file doesn't yet exist.
2905              Before opening it for writing (thus creating it), make sure all
2906              of the input files exist.  Otherwise, creating the output file
2907              could create an otherwise missing input file, making sort succeed
2908              when it should fail.  */
2909           for (i = 0; i < nfiles; ++i)
2910             {
2911               struct stat sb;
2912               if (STREQ (files[i], "-"))
2913                 continue;
2914               if (stat (files[i], &sb))
2915                 {
2916                   error (0, errno, "%s", files[i]);
2917                   cleanup ();
2918                   exit (SORT_FAILURE);
2919                 }
2920             }
2921
2922           ofp = xfopen (outfile, "w");
2923         }
2924     }
2925   else
2926     {
2927       ofp = stdout;
2928     }
2929
2930   if (mergeonly)
2931     merge (files, nfiles, ofp);
2932   else
2933     sort (files, nfiles, ofp);
2934   cleanup ();
2935
2936   /* If we wait for the implicit flush on exit, and the parent process
2937      has closed stdout (e.g., exec >&- in a shell), then the output file
2938      winds up empty.  I don't understand why.  This is under SunOS,
2939      Solaris, Ultrix, and Irix.  This premature fflush makes the output
2940      reappear. --karl@cs.umb.edu  */
2941   if (fflush (ofp) < 0)
2942     error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2943
2944   if (have_read_stdin && fclose (stdin) == EOF)
2945     error (SORT_FAILURE, errno, outfile);
2946   if (ferror (stdout) || fclose (stdout) == EOF)
2947     error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2948
2949   exit (EXIT_SUCCESS);
2950 }