src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 88, 1991-1999 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software
  16    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17
  18    Written December 1988 by Mike Haertel.
  19    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  20    or (US mail) as Mike Haertel c/o Free Software Foundation.
  21
  22    Ørn E. Hansen added NLS support in 1997.  */
  23
  24 #include <config.h>
  25
  26 #include <sys/types.h>
  27 #include <signal.h>
  28 #include <stdio.h>
  29 #include <assert.h>
  30 #include "system.h"
  31 #include "long-options.h"
  32 #include "error.h"
  33 #include "xstrtod.h"
  34 #include "xalloc.h"
  35
  36 /* The official name of this program (e.g., no `g' prefix).  */
  37 #define PROGRAM_NAME "sort"
  38
  39 #define AUTHORS "Mike Haertel"
  40
  41 #if defined ENABLE_NLS && HAVE_LANGINFO_H
  42 # include <langinfo.h>
  43 #endif
  44
  45 #if HAVE_PATHCONF && defined _PC_NAME_MAX
  46 # define NAME_MAX_IN_DIR(Dir) pathconf (Dir, _PC_NAME_MAX)
  47 #else
  48 # define NAME_MAX_IN_DIR(Dir) 255
  49 #endif
  50
  51 char *xstrdup ();
  52
  53 /* Undefine, to avoid warning about redefinition on some systems.  */
  54 #undef min
  55 #define min(a, b) ((a) < (b) ? (a) : (b))
  56 #undef max
  57 #define max(a, b) ((a) > (b) ? (a) : (b))
  58
  59 #define UCHAR_LIM (UCHAR_MAX + 1)
  60 #define UCHAR(c) ((unsigned char) (c))
  61
  62 #ifndef DEFAULT_TMPDIR
  63 # define DEFAULT_TMPDIR "/tmp"
  64 #endif
  65
  66 /* Use this as exit status in case of error, not EXIT_FAILURE.  This
  67    is necessary because EXIT_FAILURE is usually 1 and POSIX requires
  68    that sort exit with status 1 IFF invoked with -c and the input is
  69    not properly sorted.  Any other irregular exit must exit with a
  70    status code greater than 1.  */
  71 #define SORT_FAILURE 2
  72
  73 #define FLOATING_POINT  '.'
  74 #define FLOATING_COMMA  ','
  75 #define NEGATION_SIGN   '-'
  76 #define NUMERIC_ZERO    '0'
  77
  78 #ifdef ENABLE_NLS
  79 # define NLS_MEMCMP(S1, S2, Len) strncoll (S1, S2, Len)
  80 # define NLS_STRNCMP(S1, S2, Len) strncoll_s2_readonly (S1, S2, Len)
  81 #else
  82 # define NLS_MEMCMP(S1, S2, Len) memcmp (S1, S2, Len)
  83 # define NLS_STRNCMP(S1, S2, Len) strncmp (S1, S2, Len)
  84 #endif
  85
  86 #ifdef ENABLE_NLS
  87
  88 static unsigned char decimal_point;
  89 static unsigned char th_sep;
  90 static char *nls_grouping;
  91
  92 /* This is "C" locale, need another? */
  93 static int need_locale = 0;
  94
  95 /* Should we look for decimal point? */
  96 static int nls_fraction_found = 1;
  97
  98 /* Look for month notations in text? */
  99 static int nls_month_found = 1;
 100
 101 #else
 102
 103 # define decimal_point FLOATING_POINT
 104
 105 #endif
 106
 107 /* If native language support is requested, make a 1-1 map to the
 108    locale character map, otherwise ensure normal behavior.  */
 109 #ifdef ENABLE_NLS
 110
 111 /* 12 months in a year */
 112 # define NLS_NUM_MONTHS 12
 113
 114 /* Maximum number of elements, to allocate per allocation unit  */
 115 # define NLS_MAX_GROUPS 8
 116
 117 /* A string with one character, to enforce char collation */
 118 # define NLS_ONE_CHARACTER_STRING " "
 119
 120 #endif
 121
 122 /* The kind of blanks for '-b' to skip in various options. */
 123 enum blanktype { bl_start, bl_end, bl_both };
 124
 125 /* The character marking end of line. Default to \n. */
 126 int eolchar = '\n';
 127
 128 /* Lines are held in core as counted strings. */
 129 struct line
 130 {
 131   char *text;                   /* Text of the line. */
 132   int length;                   /* Length not including final newline. */
 133   char *keybeg;                 /* Start of first key. */
 134   char *keylim;                 /* Limit of first key. */
 135 };
 136
 137 /* Arrays of lines. */
 138 struct lines
 139 {
 140   struct line *lines;           /* Dynamically allocated array of lines. */
 141   int used;                     /* Number of slots used. */
 142   int alloc;                    /* Number of slots allocated. */
 143   int limit;                    /* Max number of slots to allocate.  */
 144 };
 145
 146 /* Input buffers. */
 147 struct buffer
 148 {
 149   char *buf;                    /* Dynamically allocated buffer. */
 150   int used;                     /* Number of bytes used. */
 151   int alloc;                    /* Number of bytes allocated. */
 152   int left;                     /* Number of bytes left after line parsing. */
 153 };
 154
 155 struct keyfield
 156 {
 157   int sword;                    /* Zero-origin 'word' to start at. */
 158   int schar;                    /* Additional characters to skip. */
 159   int skipsblanks;              /* Skip leading white space at start. */
 160   int eword;                    /* Zero-origin first word after field. */
 161   int echar;                    /* Additional characters in field. */
 162   int skipeblanks;              /* Skip trailing white space at finish. */
 163   int *ignore;                  /* Boolean array of characters to ignore. */
 164   char *translate;              /* Translation applied to characters. */
 165   int numeric;                  /* Flag for numeric comparison.  Handle
 166                                    strings of digits with optional decimal
 167                                    point, but no exponential notation. */
 168   int general_numeric;          /* Flag for general, numeric comparison.
 169                                    Handle numbers in exponential notation. */
 170   int month;                    /* Flag for comparison by month name. */
 171   int reverse;                  /* Reverse the sense of comparison. */
 172   struct keyfield *next;        /* Next keyfield to try. */
 173 };
 174
 175 struct month
 176 {
 177   char *name;
 178   int val;
 179 };
 180
 181 /* The name this program was run with. */
 182 char *program_name;
 183
 184 /* Table of white space. */
 185 static int blanks[UCHAR_LIM];
 186
 187 /* Table of non-printing characters. */
 188 static int nonprinting[UCHAR_LIM];
 189
 190 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 191 static int nondictionary[UCHAR_LIM];
 192
 193 /* Translation table folding lower case to upper. */
 194 static char fold_toupper[UCHAR_LIM];
 195
 196 /* Table mapping 3-letter month names to integers.
 197    Alphabetic order allows binary search. */
 198 static const struct month us_monthtab[] =
 199 {
 200   {"APR", 4},
 201   {"AUG", 8},
 202   {"DEC", 12},
 203   {"FEB", 2},
 204   {"JAN", 1},
 205   {"JUL", 7},
 206   {"JUN", 6},
 207   {"MAR", 3},
 208   {"MAY", 5},
 209   {"NOV", 11},
 210   {"OCT", 10},
 211   {"SEP", 9}
 212 };
 213
 214 #ifdef ENABLE_NLS
 215
 216 /* Locale may have a different idea of month names   */
 217 static struct month nls_monthtab[NLS_NUM_MONTHS];
 218 static int nls_months_collide[NLS_NUM_MONTHS + 1];
 219
 220 /* Numeric keys, to search for numeric format */
 221 struct nls_keyfield
 222 {
 223   struct keyfield *key;
 224   struct nls_keyfield *next;
 225 };
 226
 227 static struct nls_keyfield *nls_keyhead = NULL;
 228
 229 #endif
 230
 231 /* Which month table to use in the program, default C */
 232 static const struct month *monthtab = us_monthtab;
 233
 234 /* During the merge phase, the number of files to merge at once. */
 235 #define NMERGE 16
 236
 237 /* Initial buffer size for in core sorting.  Will not grow unless a
 238    line longer than this is seen. */
 239 static int sortalloc = 512 * 1024;
 240
 241 /* Initial buffer size for in core merge buffers.  Bear in mind that
 242    up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
 243 static int mergealloc = 16 * 1024;
 244
 245 /* Guess of average line length. */
 246 static int linelength = 30;
 247
 248 /* Maximum number of elements for the array(s) of struct line's, in bytes.  */
 249 #define LINEALLOC (256 * 1024)
 250
 251 /* Directory in which any temporary files are to be created. */
 252 static char *temp_dir;
 253
 254 /* Flag to reverse the order of all comparisons. */
 255 static int reverse;
 256
 257 /* Flag for stable sort.  This turns off the last ditch bytewise
 258    comparison of lines, and instead leaves lines in the same order
 259    they were read if all keys compare equal.  */
 260 static int stable;
 261
 262 /* Tab character separating fields.  If NUL, then fields are separated
 263    by the empty string between a non-whitespace character and a whitespace
 264    character. */
 265 static char tab;
 266
 267 /* Flag to remove consecutive duplicate lines from the output.
 268    Only the last of a sequence of equal lines will be output. */
 269 static int unique;
 270
 271 /* Nonzero if any of the input files are the standard input. */
 272 static int have_read_stdin;
 273
 274 /* Lists of key field comparisons to be tried. */
 275 static struct keyfield keyhead;
 276
 277 void
 278 usage (int status)
 279 {
 280   if (status != 0)
 281     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 282              program_name);
 283   else
 284     {
 285       printf (_("\
 286 Usage: %s [OPTION]... [FILE]...\n\
 287 "),
 288               program_name);
 289       printf (_("\
 290 Write sorted concatenation of all FILE(s) to standard output.\n\
 291 \n\
 292   +POS1 [-POS2]    start a key at POS1, end it before POS2\n\
 293   -b               ignore leading blanks in sort fields or keys\n\
 294   -c               check if given files already sorted, do not sort\n\
 295   -d               consider only [a-zA-Z0-9 ] characters in keys\n\
 296   -f               fold lower case to upper case characters in keys\n\
 297   -g               compare according to general numerical value, imply -b\n\
 298   -i               consider only [\\040-\\0176] characters in keys\n\
 299   -k POS1[,POS2]   same as +POS1 [-POS2], but all positions counted from 1\n\
 300   -m               merge already sorted files, do not sort\n\
 301   -M               compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
 302   -n               compare according to string numerical value, imply -b\n\
 303   -o FILE          write result on FILE instead of standard output\n\
 304   -r               reverse the result of comparisons\n\
 305   -s               stabilize sort by disabling last resort comparison\n\
 306   -t SEP           use SEParator instead of non- to whitespace transition\n\
 307   -T DIRECTORY     use DIRECTORY for temporary files, not $TMPDIR or %s\n\
 308   -u               with -c, check for strict ordering;\n\
 309                    with -m, only output the first of an equal sequence\n\
 310   -z               end lines with 0 byte, not newline, for find -print0\n\
 311       --help       display this help and exit\n\
 312       --version    output version information and exit\n\
 313 \n\
 314 POS is F[.C][OPTS], where F is the field number and C the character\n\
 315 position in the field, both counted from zero.  OPTS is made up of one\n\
 316 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
 317 for that key.  If no key given, use the entire line as key.  With no\n\
 318 FILE, or when FILE is -, read standard input.\n\
 319 ")
 320               , DEFAULT_TMPDIR);
 321       puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
 322     }
 323   /* Don't use EXIT_FAILURE here in case it is defined to be 1.
 324      POSIX requires that sort return 1 IFF invoked with -c and
 325      the input is not properly sorted.  */
 326   assert (status == 0 || status == SORT_FAILURE);
 327   exit (status);
 328 }
 329
 330 /* The list of temporary files. */
 331 static struct tempnode
 332 {
 333   char *name;
 334   struct tempnode *next;
 335 } temphead;
 336
 337 /* Clean up any remaining temporary files. */
 338
 339 static void
 340 cleanup (void)
 341 {
 342   struct tempnode *node;
 343
 344   for (node = temphead.next; node; node = node->next)
 345     unlink (node->name);
 346 }
 347
 348 static FILE *
 349 xtmpfopen (const char *file)
 350 {
 351   FILE *fp;
 352   int fd;
 353
 354   /*  Open temporary file exclusively, to foil a common
 355       denial-of-service attack.  */
 356   fd = open (file, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0600);
 357   if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
 358     {
 359       error (0, errno, "%s", file);
 360       cleanup ();
 361       exit (SORT_FAILURE);
 362     }
 363
 364   return fp;
 365 }
 366
 367 static FILE *
 368 xfopen (const char *file, const char *how)
 369 {
 370   FILE *fp;
 371
 372   if (STREQ (file, "-"))
 373     {
 374       fp = stdin;
 375     }
 376   else
 377     {
 378       if ((fp = fopen (file, how)) == NULL)
 379         {
 380           error (0, errno, "%s", file);
 381           cleanup ();
 382           exit (SORT_FAILURE);
 383         }
 384     }
 385
 386   if (fp == stdin)
 387     have_read_stdin = 1;
 388   return fp;
 389 }
 390
 391 static void
 392 xfclose (FILE *fp)
 393 {
 394   if (fp == stdin)
 395     {
 396       /* Allow reading stdin from tty more than once. */
 397       if (feof (fp))
 398         clearerr (fp);
 399     }
 400   else if (fp == stdout)
 401     {
 402       if (fflush (fp) != 0)
 403         {
 404           error (0, errno, _("flushing file"));
 405           cleanup ();
 406           exit (SORT_FAILURE);
 407         }
 408     }
 409   else
 410     {
 411       if (fclose (fp) != 0)
 412         {
 413           error (0, errno, _("error closing file"));
 414           cleanup ();
 415           exit (SORT_FAILURE);
 416         }
 417     }
 418 }
 419
 420 static void
 421 write_bytes (const char *buf, size_t n_bytes, FILE *fp)
 422 {
 423   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
 424     {
 425       error (0, errno, _("write error"));
 426       cleanup ();
 427       exit (SORT_FAILURE);
 428     }
 429 }
 430
 431 /* Return a name for a temporary file. */
 432
 433 static char *
 434 tempname (void)
 435 {
 436   static unsigned int seq;
 437   int len = strlen (temp_dir);
 438   char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
 439   int long_file_names = NAME_MAX_IN_DIR (temp_dir) > 12;
 440   struct tempnode *node;
 441
 442   /* If long filenames aren't supported, we cannot use filenames
 443      longer than 8+3 and still assume they are unique.  */
 444   if (long_file_names)
 445     sprintf (name,
 446              "%s%ssort%5.5d%5.5d",
 447              temp_dir,
 448              (len && temp_dir[len - 1] != '/') ? "/" : "",
 449              (unsigned int) getpid () & 0xffff, seq);
 450   else
 451     sprintf (name, "%s%ss%5.5d%2.2d.%3.3d",
 452              temp_dir,
 453              (len && temp_dir[len - 1] != '/') ? "/" : "",
 454              (unsigned int) getpid () & 0xffff, seq / 1000, seq % 1000);
 455
 456   ++seq;
 457
 458   /* Make sure that SEQ's value fits in 5 digits if temp_dir is on
 459      an 8.3 filesystem.  */
 460   if (!long_file_names && seq >= 100000)
 461     seq = 0;
 462
 463   node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
 464   node->name = name;
 465   node->next = temphead.next;
 466   temphead.next = node;
 467   return name;
 468 }
 469
 470 /* Search through the list of temporary files for NAME;
 471    remove it if it is found on the list. */
 472
 473 static void
 474 zaptemp (const char *name)
 475 {
 476   struct tempnode *node, *temp;
 477
 478   for (node = &temphead; node->next; node = node->next)
 479     if (STREQ (name, node->next->name))
 480       break;
 481   if (node->next)
 482     {
 483       temp = node->next;
 484       unlink (temp->name);
 485       free (temp->name);
 486       node->next = temp->next;
 487       free ((char *) temp);
 488     }
 489 }
 490
 491 #ifdef ENABLE_NLS
 492 /* Initialize the character class tables. */
 493
 494 static int
 495 nls_sort_month_comp (const void *m1, const void *m2)
 496 {
 497   return strcoll (((const struct month *) m1)->name,
 498                   ((const struct month *) m2)->name);
 499 }
 500
 501 /* Do collation on strings S1 and S2, but for at most L characters.
 502    we use the fact, that we KNOW that LEN is the min of the two lengths */
 503 static int
 504 strncoll (char *s1, char *s2, int len)
 505 {
 506   register int diff;
 507
 508   if (need_locale)
 509     {
 510       /* Emulate a strncoll function, by forcing strcoll to compare
 511          only the first LEN characters in each string. */
 512       register unsigned char n1 = s1[len];
 513       register unsigned char n2 = s2[len];
 514
 515       s1[len] = s2[len] = 0;
 516       diff = strcoll (s1, s2);
 517       s1[len] = n1;
 518       s2[len] = n2;
 519     }
 520   else
 521     {
 522       diff = memcmp (s1, s2, len);
 523     }
 524
 525   return diff;
 526 }
 527
 528 /* Do collation on strings S1 and S2, but for at most L characters.
 529    Use the fact, that we KNOW that S2 is the shorter string and has
 530    length LEN.  */
 531 static int
 532 strncoll_s2_readonly (char *s1, const char *s2, int len)
 533 {
 534   register int diff;
 535
 536   assert (len == strlen (s2));
 537   assert (len <= strlen (s1));
 538
 539   if (need_locale)
 540     {
 541       /* Emulate a strncoll function, by forcing strcoll to compare
 542          only the first LEN characters in each string. */
 543       register unsigned char n1 = s1[len];
 544
 545       s1[len] = 0;
 546       diff = strcoll (s1, s2);
 547       s1[len] = n1;
 548     }
 549   else
 550     {
 551       diff = memcmp (s1, s2, len);
 552     }
 553
 554   return diff;
 555 }
 556
 557 #endif /* NLS */
 558
 559 static void
 560 inittables (void)
 561 {
 562   int i;
 563
 564   for (i = 0; i < UCHAR_LIM; ++i)
 565     {
 566       if (ISBLANK (i))
 567         blanks[i] = 1;
 568       if (!ISPRINT (i))
 569         nonprinting[i] = 1;
 570       if (!ISALNUM (i) && !ISBLANK (i))
 571         nondictionary[i] = 1;
 572       if (ISLOWER (i))
 573         fold_toupper[i] = toupper (i);
 574       else
 575         fold_toupper[i] = i;
 576     }
 577
 578 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
 579   /* If We're not in the "C" locale, read in different names for months. */
 580   if (need_locale)
 581     {
 582       nls_months_collide[0] = 1;        /* if an error, look again       */
 583       for (i = 0; i < NLS_NUM_MONTHS; i++)
 584         {
 585           char *s;
 586           size_t s_len;
 587           int j;
 588
 589           s = (char *) nl_langinfo (ABMON_1 + us_monthtab[i].val - 1);
 590           s_len = strlen (s);
 591           nls_monthtab[i].name = (char *) xmalloc (s_len + 1);
 592           nls_monthtab[i].val = us_monthtab[i].val;
 593
 594           /* Be careful: abreviated month names
 595              may be longer than the usual 3 characters.  */
 596           for (j = 0; j < s_len; j++)
 597             nls_monthtab[i].name[j] = fold_toupper[UCHAR (s[j])];
 598           nls_monthtab[i].name[j] = '\0';
 599
 600           nls_months_collide[nls_monthtab[i].val] = 0;
 601           for (j = 0; j < NLS_NUM_MONTHS; ++j)
 602             {
 603               if (STREQ (nls_monthtab[i].name, us_monthtab[i].name))
 604                 {
 605                   /* There are indeed some month names in English which
 606                      collide with the NLS name.  */
 607                   nls_months_collide[nls_monthtab[i].val] = 1;
 608                   break;
 609                 }
 610             }
 611         }
 612       /* Now quicksort the month table (should be sorted already!).
 613          However, another locale doesn't rule out the possibility
 614          of a different order of month names. */
 615       qsort ((void *) nls_monthtab, NLS_NUM_MONTHS,
 616              sizeof (struct month), nls_sort_month_comp);
 617       monthtab = nls_monthtab;
 618     }
 619 #endif /* NLS */
 620 }
 621
 622 /* Initialize BUF, allocating ALLOC bytes initially. */
 623
 624 static void
 625 initbuf (struct buffer *buf, int alloc)
 626 {
 627   buf->alloc = alloc;
 628   buf->buf = xmalloc (buf->alloc);
 629   buf->used = buf->left = 0;
 630 }
 631
 632 /* Fill BUF reading from FP, moving buf->left bytes from the end
 633    of buf->buf to the beginning first.  If EOF is reached and the
 634    file wasn't terminated by a newline, supply one.  Return a count
 635    of bytes buffered. */
 636
 637 static int
 638 fillbuf (struct buffer *buf, FILE *fp)
 639 {
 640   int cc;
 641
 642   memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
 643   buf->used = buf->left;
 644
 645   while (!feof (fp) && (buf->used == 0
 646                         || !memchr (buf->buf, eolchar, buf->used)))
 647     {
 648       if (buf->used == buf->alloc)
 649         {
 650           buf->alloc *= 2;
 651           buf->buf = xrealloc (buf->buf, buf->alloc);
 652         }
 653       cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
 654       if (ferror (fp))
 655         {
 656           error (0, errno, _("read error"));
 657           cleanup ();
 658           exit (SORT_FAILURE);
 659         }
 660       buf->used += cc;
 661     }
 662
 663   if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
 664     {
 665       if (buf->used == buf->alloc)
 666         {
 667           buf->alloc *= 2;
 668           buf->buf = xrealloc (buf->buf, buf->alloc);
 669         }
 670       buf->buf[buf->used++] = eolchar;
 671     }
 672
 673   return buf->used;
 674 }
 675
 676 /* Initialize LINES, allocating space for ALLOC lines initially.
 677    LIMIT is the maximum possible number of lines to allocate space
 678    for, ever.  */
 679
 680 static void
 681 initlines (struct lines *lines, int alloc, int limit)
 682 {
 683   lines->alloc = alloc;
 684   lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
 685   lines->used = 0;
 686   lines->limit = limit;
 687 }
 688
 689 /* Return a pointer to the first character of the field specified
 690    by KEY in LINE. */
 691
 692 static char *
 693 begfield (const struct line *line, const struct keyfield *key)
 694 {
 695   register char *ptr = line->text, *lim = ptr + line->length;
 696   register int sword = key->sword, schar = key->schar;
 697
 698   if (tab)
 699     while (ptr < lim && sword--)
 700       {
 701         while (ptr < lim && *ptr != tab)
 702           ++ptr;
 703         if (ptr < lim)
 704           ++ptr;
 705       }
 706   else
 707     while (ptr < lim && sword--)
 708       {
 709         while (ptr < lim && blanks[UCHAR (*ptr)])
 710           ++ptr;
 711         while (ptr < lim && !blanks[UCHAR (*ptr)])
 712           ++ptr;
 713       }
 714
 715   if (key->skipsblanks)
 716     while (ptr < lim && blanks[UCHAR (*ptr)])
 717       ++ptr;
 718
 719   if (ptr + schar <= lim)
 720     ptr += schar;
 721   else
 722     ptr = lim;
 723
 724   return ptr;
 725 }
 726
 727 /* Return the limit of (a pointer to the first character after) the field
 728    in LINE specified by KEY. */
 729
 730 static char *
 731 limfield (const struct line *line, const struct keyfield *key)
 732 {
 733   register char *ptr = line->text, *lim = ptr + line->length;
 734   register int eword = key->eword, echar = key->echar;
 735
 736   /* Note: from the POSIX spec:
 737      The leading field separator itself is included in
 738      a field when -t is not used.  FIXME: move this comment up... */
 739
 740   /* Move PTR past EWORD fields or to one past the last byte on LINE,
 741      whichever comes first.  If there are more than EWORD fields, leave
 742      PTR pointing at the beginning of the field having zero-based index,
 743      EWORD.  If a delimiter character was specified (via -t), then that
 744      `beginning' is the first character following the delimiting TAB.
 745      Otherwise, leave PTR pointing at the first `blank' character after
 746      the preceding field.  */
 747   if (tab)
 748     while (ptr < lim && eword--)
 749       {
 750         while (ptr < lim && *ptr != tab)
 751           ++ptr;
 752         if (ptr < lim && (eword || echar > 0))
 753           ++ptr;
 754       }
 755   else
 756     while (ptr < lim && eword--)
 757       {
 758         while (ptr < lim && blanks[UCHAR (*ptr)])
 759           ++ptr;
 760         while (ptr < lim && !blanks[UCHAR (*ptr)])
 761           ++ptr;
 762       }
 763
 764 #ifdef POSIX_UNSPECIFIED
 765   /* The following block of code makes GNU sort incompatible with
 766      standard Unix sort, so it's ifdef'd out for now.
 767      The POSIX spec isn't clear on how to interpret this.
 768      FIXME: request clarification.
 769
 770      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
 771      Date: Thu, 30 May 96 12:20:41 -0400
 772
 773      [...]I believe I've found another bug in `sort'.
 774
 775      $ cat /tmp/sort.in
 776      a b c 2 d
 777      pq rs 1 t
 778      $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
 779      a b c 2 d
 780      pq rs 1 t
 781      $ /bin/sort +0.6 -0.7 </tmp/sort.in
 782      pq rs 1 t
 783      a b c 2 d
 784
 785      Unix sort produced the answer I expected: sort on the single character
 786      in column 6.  GNU sort produced different results, because it disagrees
 787      on the interpretation of the key-end spec "-M.N".  Unix sort reads this
 788      as "skip M fields, then N characters"; but GNU sort wants it to mean
 789      "skip M fields, then either N characters or the rest of the current
 790      field, whichever comes first".  This extra clause applies only to
 791      key-ends, not key-starts.
 792      */
 793
 794   /* Make LIM point to the end of (one byte past) the current field.  */
 795   if (tab)
 796     {
 797       char *newlim;
 798       newlim = memchr (ptr, tab, lim - ptr);
 799       if (newlim)
 800         lim = newlim;
 801     }
 802   else
 803     {
 804       char *newlim;
 805       newlim = ptr;
 806       while (newlim < lim && blanks[UCHAR (*newlim)])
 807         ++newlim;
 808       while (newlim < lim && !blanks[UCHAR (*newlim)])
 809         ++newlim;
 810       lim = newlim;
 811     }
 812 #endif
 813
 814   /* If we're skipping leading blanks, don't start counting characters
 815      until after skipping past any leading blanks.  */
 816   if (key->skipsblanks)
 817     while (ptr < lim && blanks[UCHAR (*ptr)])
 818       ++ptr;
 819
 820   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
 821   if (ptr + echar <= lim)
 822     ptr += echar;
 823   else
 824     ptr = lim;
 825
 826   return ptr;
 827 }
 828
 829 /* FIXME */
 830
 831 void
 832 trim_trailing_blanks (const char *a_start, char **a_end)
 833 {
 834   while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
 835     --(*a_end);
 836 }
 837
 838 /* Find the lines in BUF, storing pointers and lengths in LINES.
 839    Also replace newlines in BUF with NULs. */
 840
 841 static void
 842 findlines (struct buffer *buf, struct lines *lines)
 843 {
 844   register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
 845   struct keyfield *key = keyhead.next;
 846
 847   lines->used = 0;
 848
 849   while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))
 850          && lines->used < lines->limit)
 851     {
 852       /* There are various places in the code that rely on a NUL
 853          being at the end of in-core lines; NULs inside the lines
 854          will not cause trouble, though. */
 855       *ptr = '\0';
 856
 857       if (lines->used == lines->alloc)
 858         {
 859           lines->alloc *= 2;
 860           lines->lines = (struct line *)
 861             xrealloc ((char *) lines->lines,
 862                       lines->alloc * sizeof (struct line));
 863         }
 864
 865       lines->lines[lines->used].text = beg;
 866       lines->lines[lines->used].length = ptr - beg;
 867
 868       /* Precompute the position of the first key for efficiency. */
 869       if (key)
 870         {
 871           if (key->eword >= 0)
 872             lines->lines[lines->used].keylim =
 873               limfield (&lines->lines[lines->used], key);
 874           else
 875             lines->lines[lines->used].keylim = ptr;
 876
 877           if (key->sword >= 0)
 878             lines->lines[lines->used].keybeg =
 879               begfield (&lines->lines[lines->used], key);
 880           else
 881             {
 882               if (key->skipsblanks)
 883                 while (blanks[UCHAR (*beg)])
 884                   ++beg;
 885               lines->lines[lines->used].keybeg = beg;
 886             }
 887           if (key->skipeblanks)
 888             {
 889               trim_trailing_blanks (lines->lines[lines->used].keybeg,
 890                                     &lines->lines[lines->used].keylim);
 891             }
 892         }
 893       else
 894         {
 895           lines->lines[lines->used].keybeg = 0;
 896           lines->lines[lines->used].keylim = 0;
 897         }
 898
 899       ++lines->used;
 900       beg = ptr + 1;
 901     }
 902
 903   buf->left = lim - beg;
 904 }
 905
 906 /* Compare strings A and B containing decimal fractions < 1.  Each string
 907    should begin with a decimal point followed immediately by the digits
 908    of the fraction.  Strings not of this form are considered to be zero. */
 909
 910 /* The goal here, is to take two numbers a and b... compare these
 911    in parallel.  Instead of converting each, and then comparing the
 912    outcome.  Most likely stopping the comparison before the conversion
 913    is complete.  The algorithm used, in the old sort:
 914
 915    Algorithm: fraccompare
 916    Action   : compare two decimal fractions
 917    accepts  : char *a, char *b
 918    returns  : -1 if a<b, 0 if a=b, 1 if a>b.
 919    implement:
 920
 921    if *a == decimal_point AND *b == decimal_point
 922      find first character different in a and b.
 923      if both are digits, return the difference *a - *b.
 924      if *a is a digit
 925        skip past zeroes
 926        if digit return 1, else 0
 927      if *b is a digit
 928        skip past zeroes
 929        if digit return -1, else 0
 930    if *a is a decimal_point
 931      skip past decimal_point and zeroes
 932      if digit return 1, else 0
 933    if *b is a decimal_point
 934      skip past decimal_point and zeroes
 935      if digit return -1, else 0
 936    return 0
 937
 938    The above implementation duplicates code, and thus there is room
 939    for improvement:
 940       the difference in code of a and b, is solved by using a
 941       reference to s, assigned to either a or b. and using diff
 942       to denote return value.
 943       the difference in either that start being a digit or
 944       the decimal point, is solved by testing if either is
 945       a decimal point, or if the other is a digit...
 946
 947    if *a or *b is a decimal_point
 948       skip all chars where *a == *b
 949       if *a and *b are digits
 950         return *a - *b
 951       if *a is a digit or *a is a decimal_point
 952         s is a
 953         diff is 1
 954       else
 955         s is b
 956         diff is -1
 957       skip decimal_point in s
 958       skip zeroes in s
 959       if *s is a digit
 960         return diff
 961     return 0 */
 962
 963 #define USE_NEW_FRAC_COMPARE
 964 #ifdef USE_NEW_FRAC_COMPARE
 965
 966 static int
 967 fraccompare (register const char *a, register const char *b)
 968 {
 969 # ifdef ENABLE_NLS
 970   nls_fraction_found = 1;
 971 # endif
 972
 973   if (*a == decimal_point || *b == decimal_point)
 974     {
 975       register const char *s;
 976       int diff;
 977
 978       while (*a == *b)
 979         {
 980           ++a;
 981           ++b;
 982           if (!ISDIGIT (*a))
 983             break;
 984         }
 985
 986       if (ISDIGIT (*a) && ISDIGIT (*b))
 987         return (*a) - (*b);
 988
 989       if (*a == decimal_point || (ISDIGIT (*a) && *b != decimal_point))
 990         {
 991           s = a;
 992           diff = 1;
 993         }
 994       else
 995         {
 996           s = b;
 997           diff = -1;
 998         }
 999       if (*s == decimal_point)
1000         ++s;
1001       while (*s == NUMERIC_ZERO)
1002         ++s;
1003       if (ISDIGIT (*s))
1004         return diff;
1005     }
1006   return 0;
1007 }
1008
1009 #else
1010 static int
1011 fraccompare (register const char *a, register const char *b)
1012 {
1013   register int tmpa = *a;
1014   register int tmpb = *b;
1015
1016   if (tmpa == decimal_point && tmpb == decimal_point)
1017     {
1018       do
1019         tmpa = *++a, tmpb = *++b;
1020       while (tmpa == tmpb && ISDIGIT (tmpa));
1021       if (ISDIGIT (tmpa) && ISDIGIT (tmpb))
1022         return tmpa - tmpb;
1023       if (ISDIGIT (tmpa))
1024         {
1025           while (tmpa == NUMERIC_ZERO)
1026             tmpa = *++a;
1027           if (ISDIGIT (tmpa))
1028             return 1;
1029           return 0;
1030         }
1031       if (ISDIGIT (tmpb))
1032         {
1033           while (tmpb == NUMERIC_ZERO)
1034             tmpb = *++b;
1035           if (ISDIGIT (tmpb))
1036             return -1;
1037           return 0;
1038         }
1039       return 0;
1040     }
1041   else if (tmpa == decimal_point)
1042     {
1043       do
1044         tmpa = *++a;
1045       while (tmpa == NUMERIC_ZERO);
1046       if (ISDIGIT (tmpa))
1047         return 1;
1048       return 0;
1049     }
1050   else if (tmpb == decimal_point)
1051     {
1052       do
1053         tmpb = *++b;
1054       while (tmpb == NUMERIC_ZERO);
1055       if (ISDIGIT (tmpb))
1056         return -1;
1057       return 0;
1058     }
1059   return 0;
1060 }
1061 #endif
1062
1063 /* Compare strings A and B as numbers without explicitly converting them to
1064    machine numbers.  Comparatively slow for short strings, but asymptotically
1065    hideously fast. */
1066
1067 /* The code here, is like the above... continuous reoccurrance of the
1068    same code... improved 15-JAN-1997 in connection with native languages
1069    support */
1070
1071 #ifdef ENABLE_NLS
1072
1073 /* Decide the kind of fraction the program will use */
1074 static void
1075 nls_set_fraction (register unsigned char ch)
1076 {
1077   if (!nls_fraction_found && ch != decimal_point)
1078     {
1079       if (ch == FLOATING_POINT)
1080         {                               /* US style */
1081           decimal_point = FLOATING_POINT;
1082           th_sep = FLOATING_COMMA;
1083         }
1084       else if (ch == FLOATING_COMMA)
1085         {                               /* EU style */
1086           decimal_point = FLOATING_COMMA;
1087           th_sep = FLOATING_POINT;
1088         }
1089       else if (ch != decimal_point)
1090         {                               /* Alien    */
1091           decimal_point = ch;
1092           th_sep = '\0';
1093         }
1094     }
1095   nls_fraction_found = 1;
1096 }
1097
1098 /* Look for a fraction
1099    It isn't as simple as it looks... however, consider a number:
1100       1.234,00
1101       1,234.00
1102    It's easy to tell which is a decimal point, and which isn't.  We use
1103    the grouping information to find out how many digits are grouped together
1104    for thousand separator.
1105
1106    The idea here, is to use the grouping information... but not to
1107    spend time with verifying the groups... not too much time, anyway.
1108    so, a number represented to us as:
1109       1.234.567,89
1110    will be taken and separated into different groups, separated by a
1111    separator character (Decimal point or thousands separator).
1112       {1,234,567}
1113    these are the groups of digits that lead to a separator character,
1114    and with the trailing group is added:
1115       {1,234,567,89}
1116    resulting in 4 groups of numbers.  If the resulting number of groups,
1117    are none, or just 1... this is not enough to decide anything about
1118    the decimal point.  We need at least two for that.  With two groups
1119    we have at least one separator.  That separator can be a decimal
1120    point, or a thousands separator... if it is a thousands separator
1121    the number of digits in the last group, will comply with the first
1122    rule in the grouping rule for numeric values. i.e.
1123       |{89}| = grouping[0]
1124    if so, and there are only two groups of numbers, the value cannot
1125    be determined.  If there are three or more numbers, the separator
1126    separating the groups is checked.  If these are the same, the
1127    character is determined to be a thousands separator.  If they are
1128    not the same, the last separator is determined to be a decimal
1129    point.  If checking the grouping rules, we find out that there
1130    are no grouping rules defined, either the grouping rules is NULL
1131    or the first grouping number is 0, then the locale format is used.
1132
1133    We try to take an advantage of a special situation.  If the trailing
1134    group, the one that normally should be the fractional part, turns
1135    out to have the same length as the thousands separator rule says,
1136    making a doubt on that it may be a decimal point, we look for the
1137    group before that, i.e. with a two group form:
1138      {1234,567}
1139    where the grouping rule is 3;3... we take a look at group 1, and find
1140    out that |{1234}| > larger of the two first grouping rules, then
1141    the separator has to be a decimal point...
1142    */
1143
1144 static void
1145 look_for_fraction (const char *s, const char *e)
1146 {
1147   register const char *p;
1148   register unsigned short n = 0;
1149   static unsigned short max_groups = 0;
1150   static unsigned short *groups = NULL;
1151
1152   if (groups == NULL)
1153     {
1154       max_groups = NLS_MAX_GROUPS;
1155       groups = (unsigned short *) xmalloc (sizeof (*groups) * max_groups);
1156     }
1157
1158   /* skip blanks and signs */
1159   while (blanks[UCHAR (*s)] || *s == NEGATION_SIGN)
1160     s++;
1161   /* groups = {}, n = 0 */
1162   for (p = s; p < e; p++)
1163     {
1164       /* groups[n]={number of digits leading to separator n}
1165          n = number of separators so far */
1166       if (*p == decimal_point || *p == th_sep || *p == FLOATING_POINT)
1167         {
1168           if (++n >= max_groups)
1169             {
1170               /* BIG Number... enlarge table */
1171               max_groups += NLS_MAX_GROUPS;
1172               groups = (unsigned short *) xrealloc ((char *) groups,
1173                                                     (sizeof (*groups)
1174                                                      * max_groups));
1175             }
1176           groups[n] = (unsigned short) (p - s);
1177           s = p + 1;
1178         }
1179       else if (!ISDIGIT (*p))
1180         break;
1181       /* mem[s..p]=digits only */
1182     }
1183   /* n = number of separators in s..e */
1184   groups[++n] = (short) (p - s);
1185   /* n = groups in the number */
1186   if (n <= 1)
1187     return;                     /* Only one group of numbers... not enough */
1188   p = nls_grouping;
1189   /* p = address of group rules
1190      s = address of next character after separator */
1191   s = s - 1;                    /* s = address of last separator */
1192   if (p && *p)
1193     {
1194       /* a legal trailing group, iff groups[n] == first rule */
1195       if (groups[n] != (short) *p)
1196         nls_set_fraction (*s);
1197       else
1198         {
1199           if (n == 2)
1200             {                   /* Only two groups */
1201               if (groups[n - 1] > max (p[0], p[1]))
1202                 nls_set_fraction (*s);
1203               return;
1204             }
1205           /* if the separators are the same, it's a thousands */
1206           if (*s != *(s - groups[n]))
1207             nls_set_fraction (*s);
1208           /* s[0] = thousands separator */
1209           else if (*s == th_sep)
1210             nls_fraction_found = 1;
1211         }
1212     }
1213   else
1214     {
1215       /* no grouping allowed here, last separator IS decimal point */
1216       nls_set_fraction (*s);
1217     }
1218 }
1219
1220 static int
1221 numcompare (register const char *a, register const char *b)
1222 {
1223   int ret_code = 1;             /* normal return status, see later in code */
1224   int diff = 0;                 /* difference between two digits           */
1225
1226   while (blanks[UCHAR (*a)])
1227     ++a;
1228   while (blanks[UCHAR (*b)])
1229     ++b;
1230
1231   /* next character in a,b is non-blank */
1232   if ((*a == NEGATION_SIGN || *b == NEGATION_SIGN) && *a != *b)
1233     {
1234       /* a < 0, or b < 0, but not both */
1235       if (*a == NEGATION_SIGN)
1236         ret_code = -1, ++a;     /* a looks < b */
1237       else if (*b == NEGATION_SIGN)
1238         ret_code = 1, ++b;      /* b looks < a */
1239       /* bypass zeroes, decimal points, and thousand sep in a & b */
1240       while (*a == NUMERIC_ZERO || (th_sep && *a == th_sep)
1241              || *a == decimal_point)
1242         ++a;
1243
1244       while (*b == NUMERIC_ZERO || (th_sep && *b == th_sep)
1245              || *b == decimal_point)
1246         ++b;
1247
1248       if (ISDIGIT (*a) || ISDIGIT (*b))
1249         /* here, either a or b or both are digits
1250            if a and b are digits, the signed one is the lesser.
1251            if a is a digit, and not b.. it means b==0, and if b==0
1252            than either is signed if b is signed then -0 < a
1253            or if a is signed then -a < 0.  The ret_code is already set
1254            to mark that the signed number is the lesser, so we just
1255            return that number here.                                    */
1256         return ret_code;
1257
1258       /* *a and *b are neither digits, they are equal -0 == +0 */
1259       return 0;
1260     }
1261   else
1262     {
1263       /* either both numbers are signed, or both are not-signed */
1264       if (*a == NEGATION_SIGN)
1265         {
1266           ++a;
1267           ++b;
1268           ret_code = -1;
1269         }
1270       /* if both are signed, then remember -100 < -10 (ret_code reversed!) */
1271
1272       /* Skip any leading zeroes */
1273       while (*a == NUMERIC_ZERO)
1274         ++a;
1275       while (*b == NUMERIC_ZERO)
1276         ++b;
1277
1278     continue_thousands:
1279
1280       /* skip all equal digits */
1281       while (ISDIGIT (*a) && ISDIGIT (*b) && *a == *b)
1282         a++, b++;
1283
1284       /* Here, we have either different digits, or possible fractions
1285          or thousand separators. */
1286
1287       if (ISDIGIT (*a) && ISDIGIT (*b))
1288         {
1289           if (diff == 0)
1290             diff = ((*a) - (*b));       /* simple, isn't it? not quite */
1291           a++, b++;
1292           goto continue_thousands;
1293         }
1294
1295       /* now, here either may be a fraction, or a thousand separator...
1296          or both.                                                        */
1297       /* We've decided what are decimal_points, and what are thousands sep */
1298       if ((th_sep != 0) && (*a == th_sep || *b == th_sep))
1299         {
1300           if (*a == th_sep)
1301             ++a;
1302           if (*b == th_sep)
1303             ++b;
1304           goto continue_thousands;      /* Ugly, but better than a while(1) */
1305         }
1306
1307       if (ISDIGIT (*a))
1308         return ret_code;        /* a has more digits than b */
1309       if (ISDIGIT (*b))
1310         return ret_code * -1;   /* b has more digits than a */
1311
1312       /* now, we should have the fractions solved */
1313       if ((diff == 0) && (*a == decimal_point || *b == decimal_point))
1314         return ret_code * fraccompare (a, b);
1315
1316       return diff;              /* fall through here, and diff decides */
1317     }
1318 }
1319 #else
1320 static int
1321 numcompare (register const char *a, register const char *b)
1322 {
1323   register int tmpa, tmpb, loga, logb, tmp;
1324
1325   tmpa = UCHAR (*a);
1326   tmpb = UCHAR (*b);
1327
1328   while (blanks[tmpa])
1329     tmpa = UCHAR (*++a);
1330   while (blanks[tmpb])
1331     tmpb = UCHAR (*++b);
1332
1333   if (tmpa == NEGATION_SIGN)
1334     {
1335       do
1336         tmpa = UCHAR (*++a);
1337       while (tmpa == NUMERIC_ZERO);
1338       if (tmpb != NEGATION_SIGN)
1339         {
1340           if (tmpa == decimal_point)
1341             do
1342               tmpa = *++a;
1343             while (tmpa == NUMERIC_ZERO);
1344           if (ISDIGIT (tmpa))
1345             return -1;
1346           while (tmpb == NUMERIC_ZERO)
1347             tmpb = UCHAR (*++b);
1348           if (tmpb == decimal_point)
1349             do
1350               tmpb = *++b;
1351             while (tmpb == NUMERIC_ZERO);
1352           if (ISDIGIT (tmpb))
1353             return -1;
1354           return 0;
1355         }
1356       do
1357         tmpb = UCHAR (*++b);
1358       while (tmpb == NUMERIC_ZERO);
1359
1360       while (tmpa == tmpb && ISDIGIT (tmpa))
1361         tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
1362
1363       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1364           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1365         return -fraccompare (a, b);
1366
1367       if (ISDIGIT (tmpa))
1368         for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga)
1369           ;
1370       else
1371         loga = 0;
1372
1373       if (ISDIGIT (tmpb))
1374         for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb)
1375           ;
1376       else
1377         logb = 0;
1378
1379       if ((tmp = logb - loga) != 0)
1380         return tmp;
1381
1382       if (!loga)
1383         return 0;
1384
1385       return tmpb - tmpa;
1386     }
1387   else if (tmpb == NEGATION_SIGN)
1388     {
1389       do
1390         tmpb = UCHAR (*++b);
1391       while (tmpb == NUMERIC_ZERO);
1392       if (tmpb == decimal_point)
1393         do
1394           tmpb = *++b;
1395         while (tmpb == NUMERIC_ZERO);
1396       if (ISDIGIT (tmpb))
1397         return 1;
1398       while (tmpa == NUMERIC_ZERO)
1399         tmpa = UCHAR (*++a);
1400       if (tmpa == decimal_point)
1401         do
1402           tmpa = UCHAR (*++a);
1403         while (tmpa == NUMERIC_ZERO);
1404       if (ISDIGIT (tmpa))
1405         return 1;
1406       return 0;
1407     }
1408   else
1409     {
1410       while (tmpa == NUMERIC_ZERO)
1411         tmpa = UCHAR (*++a);
1412       while (tmpb == NUMERIC_ZERO)
1413         tmpb = UCHAR (*++b);
1414
1415       while (tmpa == tmpb && ISDIGIT (tmpa))
1416         tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
1417
1418       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1419           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1420         return fraccompare (a, b);
1421
1422       if (ISDIGIT (tmpa))
1423         for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga)
1424           ;
1425       else
1426         loga = 0;
1427
1428       if (ISDIGIT (tmpb))
1429         for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb)
1430           ;
1431       else
1432         logb = 0;
1433
1434       if ((tmp = loga - logb) != 0)
1435         return tmp;
1436
1437       if (!loga)
1438         return 0;
1439
1440       return tmpa - tmpb;
1441     }
1442 }
1443 #endif
1444
1445 static int
1446 general_numcompare (const char *sa, const char *sb)
1447 {
1448   double a, b;
1449   /* FIXME: add option to warn about failed conversions.  */
1450   /* FIXME: maybe add option to try expensive FP conversion
1451      only if A and B can't be compared more cheaply/accurately.  */
1452   if (xstrtod (sa, NULL, &a))
1453     {
1454       a = 0;
1455     }
1456   if (xstrtod (sb, NULL, &b))
1457     {
1458       b = 0;
1459     }
1460   return a == b ? 0 : a < b ? -1 : 1;
1461 }
1462
1463 /* Return an integer in 1..12 of the month name S with length LEN.
1464    Return 0 if the name in S is not recognized.  */
1465
1466 static int
1467 getmonth (const char *s, int len)
1468 {
1469   char *month;
1470   register int i, lo = 0, hi = 12, result;
1471
1472   while (len > 0 && blanks[UCHAR (*s)])
1473     {
1474       ++s;
1475       --len;
1476     }
1477
1478   if (len == 0)
1479     return 0;
1480
1481   month = (char *) alloca (len + 1);
1482   for (i = 0; i < len; ++i)
1483     month[i] = fold_toupper[UCHAR (s[i])];
1484   while (blanks[UCHAR (month[i - 1])])
1485     --i;
1486   month[i] = '\0';
1487
1488   do
1489     {
1490       int ix = (lo + hi) / 2;
1491
1492       len = strlen (monthtab[ix].name);
1493       if (NLS_STRNCMP (month, monthtab[ix].name, len) < 0)
1494         hi = ix;
1495       else
1496         lo = ix;
1497     }
1498   while (hi - lo > 1);
1499
1500   result = (!strncmp (month, monthtab[lo].name, len) ? monthtab[lo].val : 0);
1501
1502   return result;
1503 }
1504
1505 #ifdef ENABLE_NLS
1506 /* Look for the month in locale table, and if that fails try with
1507    us month name table                                              */
1508 static int
1509 nls_month_is_either_locale (const char *s, int len)
1510 {
1511   int ind;
1512
1513   monthtab = nls_monthtab;
1514   ind = getmonth (s, len);
1515   if (ind == 0)
1516     {
1517       monthtab = us_monthtab;
1518       ind = getmonth (s, len);
1519     }
1520   return ind;
1521 }
1522 #endif
1523
1524 /* Compare two lines A and B trying every key in sequence until there
1525    are no more keys or a difference is found. */
1526
1527 static int
1528 keycompare (const struct line *a, const struct line *b)
1529 {
1530   register char *texta, *textb, *lima, *limb;
1531   register unsigned char *translate;
1532   register int *ignore;
1533   struct keyfield *key;
1534   int diff = 0, iter = 0, lena, lenb;
1535
1536   for (key = keyhead.next; key; key = key->next, ++iter)
1537     {
1538       int comparable_lengths = 1;
1539
1540       ignore = key->ignore;
1541       translate = (unsigned char *) key->translate;
1542
1543       /* Find the beginning and limit of each field. */
1544       if (iter || a->keybeg == NULL || b->keybeg == NULL)
1545         {
1546           if (key->eword >= 0)
1547             lima = limfield (a, key), limb = limfield (b, key);
1548           else
1549             lima = a->text + a->length, limb = b->text + b->length;
1550
1551           if (key->sword >= 0)
1552             texta = begfield (a, key), textb = begfield (b, key);
1553           else
1554             {
1555               texta = a->text, textb = b->text;
1556               if (key->skipsblanks)
1557                 {
1558                   while (texta < lima && blanks[UCHAR (*texta)])
1559                     ++texta;
1560                   while (textb < limb && blanks[UCHAR (*textb)])
1561                     ++textb;
1562                 }
1563             }
1564         }
1565       else
1566         {
1567           /* For the first iteration only, the key positions have
1568              been precomputed for us. */
1569           texta = a->keybeg, lima = a->keylim;
1570           textb = b->keybeg, limb = b->keylim;
1571         }
1572
1573       /* Find the lengths. */
1574       lena = lima - texta, lenb = limb - textb;
1575       if (lena < 0)
1576         lena = 0;
1577       if (lenb < 0)
1578         lenb = 0;
1579
1580       if (key->skipeblanks)
1581         {
1582           char *a_end = texta + lena;
1583           char *b_end = textb + lenb;
1584           trim_trailing_blanks (texta, &a_end);
1585           trim_trailing_blanks (textb, &b_end);
1586           lena = a_end - texta;
1587           lenb = b_end - textb;
1588         }
1589
1590       /* Actually compare the fields. */
1591       if (key->numeric)
1592         {
1593           if (*lima || *limb)
1594             {
1595               char savea = *lima, saveb = *limb;
1596
1597               *lima = *limb = '\0';
1598               diff = numcompare (texta, textb);
1599               *lima = savea, *limb = saveb;
1600             }
1601           else
1602             diff = numcompare (texta, textb);
1603
1604           if (diff)
1605             return key->reverse ? -diff : diff;
1606           continue;
1607         }
1608       else if (key->general_numeric)
1609         {
1610           if (*lima || *limb)
1611             {
1612               char savea = *lima, saveb = *limb;
1613
1614               *lima = *limb = '\0';
1615               diff = general_numcompare (texta, textb);
1616               *lima = savea, *limb = saveb;
1617             }
1618           else
1619             diff = general_numcompare (texta, textb);
1620
1621           if (diff)
1622             return key->reverse ? -diff : diff;
1623           continue;
1624         }
1625       else if (key->month)
1626         {
1627 #ifdef ENABLE_NLS
1628
1629           /* if we haven't decided which locale to go with, we get the
1630              month name from either.  If either month name is fully
1631              solved and the month name doesn't collide with the other
1632              locale... then use that table from there forward */
1633           if (!nls_month_found)
1634             {
1635               int x;
1636
1637               x = nls_month_is_either_locale (texta, lena);
1638               nls_month_found = !nls_months_collide[x];
1639               if (nls_month_found)
1640                 {
1641                   diff = x - getmonth (textb, lenb);
1642                 }
1643               else
1644                 {
1645                   diff = nls_month_is_either_locale (textb, lenb);
1646                   nls_month_found = !nls_months_collide[diff];
1647                   diff = x - diff;
1648                 }
1649             }
1650           else
1651 #endif
1652             diff = getmonth (texta, lena) - getmonth (textb, lenb);
1653           if (diff)
1654             return key->reverse ? -diff : diff;
1655           continue;
1656         }
1657 #ifdef ENABLE_NLS
1658
1659       /* Sorting like this may become slow, so in a simple locale the user
1660          can select a faster sort that is similar to ascii sort  */
1661       else if (need_locale)
1662         {
1663           /* FIXME: consider making parameters non-const, then when
1664              both ignore and translate are NULL (which should be most
1665              of the time) we could temporarily NUL-terminate them in
1666              place and avoid the copy.  */
1667
1668           char *copy_a = (char *) alloca (lena + 1);
1669           char *copy_b = (char *) alloca (lenb + 1);
1670           int new_len_a, new_len_b, i;
1671
1672           /* We can't use strcoll directly on the two strings,
1673              but rather must extract the text for the key
1674              (to NUL-terminate for strcoll) and handle any
1675              'ignore' and/or 'translate' before comparing.   */
1676           for (new_len_a = new_len_b = i = 0; i < max (lena, lenb); i++)
1677             {
1678               if (i < lena)
1679                 {
1680                   copy_a[new_len_a] = (translate
1681                                        ? translate[UCHAR (texta[i])]
1682                                        : texta[i]);
1683                   if (!ignore || !ignore[UCHAR (texta[i])])
1684                     ++new_len_a;
1685                 }
1686               if (i < lenb)
1687                 {
1688                   copy_b[new_len_b] = (translate
1689                                        ? translate[UCHAR (textb[i])]
1690                                        : textb [i]);
1691                   if (!ignore || !ignore[UCHAR (textb[i])])
1692                     ++new_len_b;
1693                 }
1694             }
1695
1696           copy_a[new_len_a] = copy_b[new_len_b] = 0;
1697
1698           diff = strcoll (copy_a, copy_b);
1699
1700           /* Free copy_a and copy_b.  */
1701           alloca (0);
1702
1703           if (diff)
1704             return key->reverse ? -diff : diff;
1705
1706           continue;
1707         }
1708 #endif
1709       else if (ignore && translate)
1710
1711 #define CMP_WITH_IGNORE(A, B)                                           \
1712   do                                                                    \
1713     {                                                                   \
1714           while (texta < lima && textb < limb)                          \
1715             {                                                           \
1716               while (texta < lima && ignore[UCHAR (*texta)])            \
1717                 ++texta;                                                \
1718               while (textb < limb && ignore[UCHAR (*textb)])            \
1719                 ++textb;                                                \
1720               if (texta < lima && textb < limb)                         \
1721                 {                                                       \
1722                   if ((A) != (B))                                       \
1723                     {                                                   \
1724                       diff = UCHAR (A) - UCHAR (B);                     \
1725                       break;                                            \
1726                     }                                                   \
1727                   ++texta;                                              \
1728                   ++textb;                                              \
1729                 }                                                       \
1730                                                                         \
1731               if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1732                 diff = -1;                                              \
1733               else if (texta < lima && textb == limb                    \
1734                        && !ignore[UCHAR (*texta)])                      \
1735                 diff = 1;                                               \
1736             }                                                           \
1737                                                                         \
1738           if (diff == 0)                                                \
1739             {                                                           \
1740               while (texta < lima && ignore[UCHAR (*texta)])            \
1741                 ++texta;                                                \
1742               while (textb < limb && ignore[UCHAR (*textb)])            \
1743                 ++textb;                                                \
1744                                                                         \
1745               if (texta == lima && textb < limb)                        \
1746                 diff = -1;                                              \
1747               else if (texta < lima && textb == limb)                   \
1748                 diff = 1;                                               \
1749             }                                                           \
1750           /* Relative lengths are meaningless if characters were ignored.  \
1751              Handling this case here avoids what might be an invalid length  \
1752              comparison below.  */                                      \
1753           if (diff == 0 && texta == lima && textb == limb)              \
1754             comparable_lengths = 0;                                     \
1755     }                                                                   \
1756   while (0)
1757
1758         CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1759       else if (ignore)
1760         CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1761       else if (translate)
1762         while (texta < lima && textb < limb)
1763           {
1764             if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1765               {
1766                 diff = (UCHAR (translate[UCHAR (*--texta)])
1767                         - UCHAR (translate[UCHAR (*--textb)]));
1768                 break;
1769               }
1770           }
1771       else
1772         {
1773           diff = NLS_MEMCMP (texta, textb, min (lena, lenb));
1774         }
1775
1776       if (diff)
1777         return key->reverse ? -diff : diff;
1778       if (comparable_lengths && (diff = lena - lenb) != 0)
1779         return key->reverse ? -diff : diff;
1780     }
1781
1782   return 0;
1783 }
1784
1785 /* Compare two lines A and B, returning negative, zero, or positive
1786    depending on whether A compares less than, equal to, or greater than B. */
1787
1788 static int
1789 compare (register const struct line *a, register const struct line *b)
1790 {
1791   int diff, tmpa, tmpb, mini;
1792
1793   /* First try to compare on the specified keys (if any).
1794      The only two cases with no key at all are unadorned sort,
1795      and unadorned sort -r. */
1796   if (keyhead.next)
1797     {
1798       diff = keycompare (a, b);
1799       if (diff != 0)
1800         return diff;
1801       if (unique || stable)
1802         return 0;
1803     }
1804
1805   /* If the keys all compare equal (or no keys were specified)
1806      fall through to the default byte-by-byte comparison. */
1807   tmpa = a->length, tmpb = b->length;
1808   mini = min (tmpa, tmpb);
1809   if (mini == 0)
1810     diff = tmpa - tmpb;
1811   else
1812     {
1813       char *ap = a->text, *bp = b->text;
1814
1815 #ifdef ENABLE_NLS
1816       if (need_locale)  /* want absolutely correct sorting */
1817         {
1818           diff = strcoll (ap, bp);
1819           return reverse ? -diff : diff;
1820         }
1821 #endif
1822       diff = UCHAR (*ap) - UCHAR (*bp);
1823       if (diff == 0)
1824         {
1825           diff = NLS_MEMCMP (ap, bp, mini);
1826           if (diff == 0)
1827             diff = tmpa - tmpb;
1828         }
1829     }
1830
1831   return reverse ? -diff : diff;
1832 }
1833
1834 /* Check that the lines read from the given FP come in order.  Print a
1835    diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1836    the line number of the first out-of-order line (counting from 1) if they
1837    are not in order.  Otherwise, print no diagnostic and return zero.  */
1838
1839 static int
1840 checkfp (FILE *fp, const char *file_name)
1841 {
1842   struct buffer buf;            /* Input buffer. */
1843   struct lines lines;           /* Lines scanned from the buffer. */
1844   struct line temp;             /* Copy of previous line. */
1845   int cc;                       /* Character count. */
1846   int alloc;
1847   int line_number = 1;
1848   struct line *disorder_line;
1849   int disorder_line_number = 0;
1850
1851 #ifdef lint  /* Suppress `used before initialized' warning.  */
1852   disorder_line = NULL;
1853 #endif
1854
1855   initbuf (&buf, mergealloc);
1856   initlines (&lines, mergealloc / linelength + 1,
1857              LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1858   alloc = linelength;
1859   temp.text = xmalloc (alloc);
1860
1861   cc = fillbuf (&buf, fp);
1862   if (cc == 0)
1863     goto finish;
1864
1865   findlines (&buf, &lines);
1866
1867   while (1)
1868     {
1869       struct line *prev_line;   /* Pointer to previous line. */
1870       int cmp;                  /* Result of calling compare. */
1871       int i;
1872
1873       /* Compare each line in the buffer with its successor. */
1874       for (i = 0; i < lines.used - 1; ++i)
1875         {
1876           cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1877           if ((unique && cmp >= 0) || (cmp > 0))
1878             {
1879               disorder_line = &lines.lines[i + 1];
1880               disorder_line_number = line_number + i + 1;
1881               goto finish;
1882             }
1883         }
1884
1885       line_number += lines.used;
1886
1887       /* Save the last line of the buffer and refill the buffer. */
1888       prev_line = lines.lines + (lines.used - 1);
1889       if (prev_line->length + 1 > alloc)
1890         {
1891           do
1892             {
1893               alloc *= 2;
1894             }
1895           while (alloc < prev_line->length + 1);
1896           temp.text = xrealloc (temp.text, alloc);
1897         }
1898       assert (prev_line->length + 1 <= alloc);
1899       memcpy (temp.text, prev_line->text, prev_line->length + 1);
1900       temp.length = prev_line->length;
1901       temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1902       temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1903
1904       cc = fillbuf (&buf, fp);
1905       if (cc == 0)
1906         break;
1907
1908       findlines (&buf, &lines);
1909       /* Make sure the line saved from the old buffer contents is
1910          less than or equal to the first line of the new buffer. */
1911       cmp = compare (&temp, &lines.lines[0]);
1912       if ((unique && cmp >= 0) || (cmp > 0))
1913         {
1914           disorder_line = &lines.lines[0];
1915           disorder_line_number = line_number;
1916           break;
1917         }
1918     }
1919
1920 finish:
1921   xfclose (fp);
1922
1923   if (disorder_line_number)
1924     {
1925       fprintf (stderr, _("%s: %s:%d: disorder: "), program_name, file_name,
1926                disorder_line_number);
1927       write_bytes (disorder_line->text, disorder_line->length, stderr);
1928       putc (eolchar, stderr);
1929     }
1930
1931   free (buf.buf);
1932   free ((char *) lines.lines);
1933   free (temp.text);
1934   return disorder_line_number;
1935 }
1936
1937 /* Merge lines from FPS onto OFP.  NFPS cannot be greater than NMERGE.
1938    Close FPS before returning. */
1939
1940 static void
1941 mergefps (FILE **fps, register int nfps, FILE *ofp)
1942 {
1943   struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1944   struct lines lines[NMERGE];   /* Line tables for each buffer. */
1945   struct line saved;            /* Saved line for unique check. */
1946   int savedflag = 0;            /* True if there is a saved line. */
1947   int savealloc;                /* Size allocated for the saved line. */
1948   int cur[NMERGE];              /* Current line in each line table. */
1949   int ord[NMERGE];              /* Table representing a permutation of fps,
1950                                    such that lines[ord[0]].lines[cur[ord[0]]]
1951                                    is the smallest line and will be next
1952                                    output. */
1953   register int i, j, t;
1954
1955 #ifdef lint  /* Suppress `used before initialized' warning.  */
1956   savealloc = 0;
1957 #endif
1958
1959   /* Allocate space for a saved line if necessary. */
1960   if (unique)
1961     {
1962       savealloc = linelength;
1963       saved.text = xmalloc (savealloc);
1964     }
1965
1966   /* Read initial lines from each input file. */
1967   for (i = 0; i < nfps; ++i)
1968     {
1969       initbuf (&buffer[i], mergealloc);
1970       /* If a file is empty, eliminate it from future consideration. */
1971       while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1972         {
1973           xfclose (fps[i]);
1974           --nfps;
1975           for (j = i; j < nfps; ++j)
1976             fps[j] = fps[j + 1];
1977         }
1978       if (i == nfps)
1979         free (buffer[i].buf);
1980       else
1981         {
1982           initlines (&lines[i], mergealloc / linelength + 1,
1983                      LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1984           findlines (&buffer[i], &lines[i]);
1985           cur[i] = 0;
1986         }
1987     }
1988
1989   /* Set up the ord table according to comparisons among input lines.
1990      Since this only reorders two items if one is strictly greater than
1991      the other, it is stable. */
1992   for (i = 0; i < nfps; ++i)
1993     ord[i] = i;
1994   for (i = 1; i < nfps; ++i)
1995     if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1996                  &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1997       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1998
1999   /* Repeatedly output the smallest line until no input remains. */
2000   while (nfps)
2001     {
2002       /* If uniqified output is turned on, output only the first of
2003          an identical series of lines. */
2004       if (unique)
2005         {
2006           if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
2007             {
2008               write_bytes (saved.text, saved.length, ofp);
2009               putc (eolchar, ofp);
2010               savedflag = 0;
2011             }
2012           if (!savedflag)
2013             {
2014               if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
2015                 {
2016                   while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
2017                     savealloc *= 2;
2018                   saved.text = xrealloc (saved.text, savealloc);
2019                 }
2020               saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
2021               memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
2022                       saved.length + 1);
2023               if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
2024                 {
2025                   saved.keybeg = saved.text +
2026                     (lines[ord[0]].lines[cur[ord[0]]].keybeg
2027                      - lines[ord[0]].lines[cur[ord[0]]].text);
2028                 }
2029               if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
2030                 {
2031                   saved.keylim = saved.text +
2032                     (lines[ord[0]].lines[cur[ord[0]]].keylim
2033                      - lines[ord[0]].lines[cur[ord[0]]].text);
2034                 }
2035               savedflag = 1;
2036             }
2037         }
2038       else
2039         {
2040           write_bytes (lines[ord[0]].lines[cur[ord[0]]].text,
2041                        lines[ord[0]].lines[cur[ord[0]]].length, ofp);
2042           putc (eolchar, ofp);
2043         }
2044
2045       /* Check if we need to read more lines into core. */
2046       if (++cur[ord[0]] == lines[ord[0]].used)
2047         {
2048           if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
2049             {
2050               findlines (&buffer[ord[0]], &lines[ord[0]]);
2051               cur[ord[0]] = 0;
2052             }
2053           else
2054             {
2055               /* We reached EOF on fps[ord[0]]. */
2056               for (i = 1; i < nfps; ++i)
2057                 if (ord[i] > ord[0])
2058                   --ord[i];
2059               --nfps;
2060               xfclose (fps[ord[0]]);
2061               free (buffer[ord[0]].buf);
2062               free ((char *) lines[ord[0]].lines);
2063               for (i = ord[0]; i < nfps; ++i)
2064                 {
2065                   fps[i] = fps[i + 1];
2066                   buffer[i] = buffer[i + 1];
2067                   lines[i] = lines[i + 1];
2068                   cur[i] = cur[i + 1];
2069                 }
2070               for (i = 0; i < nfps; ++i)
2071                 ord[i] = ord[i + 1];
2072               continue;
2073             }
2074         }
2075
2076       /* The new line just read in may be larger than other lines
2077          already in core; push it back in the queue until we encounter
2078          a line larger than it. */
2079       for (i = 1; i < nfps; ++i)
2080         {
2081           t = compare (&lines[ord[0]].lines[cur[ord[0]]],
2082                        &lines[ord[i]].lines[cur[ord[i]]]);
2083           if (!t)
2084             t = ord[0] - ord[i];
2085           if (t < 0)
2086             break;
2087         }
2088       t = ord[0];
2089       for (j = 1; j < i; ++j)
2090         ord[j - 1] = ord[j];
2091       ord[i - 1] = t;
2092     }
2093
2094   if (unique && savedflag)
2095     {
2096       write_bytes (saved.text, saved.length, ofp);
2097       putc (eolchar, ofp);
2098       free (saved.text);
2099     }
2100 }
2101
2102 #ifdef ENABLE_NLS
2103
2104 /* Find the numeric format that this file represents to us for sorting. */
2105 static void
2106 nls_numeric_format (const struct line *line, int nlines)
2107 {
2108   struct nls_keyfield *n_key = nls_keyhead;
2109
2110   /* line = first line, nlines = number of lines,
2111      nls_fraction_found = false                           */
2112   for (; !nls_fraction_found && nlines > 0; line++, nlines--)
2113     {
2114       int iter;
2115       for (iter = 0; !nls_fraction_found; iter++)
2116         {
2117           char *text;
2118           char *lim;
2119           struct keyfield *key = n_key->key;
2120
2121           /* text = {}, lim = {}, key = first key */
2122           if (iter || line->keybeg == NULL)
2123             {
2124               /* Succeding keys, where the key field is
2125                  specified                              */
2126               if (key->eword >= 0) /* key->eword = length of key */
2127                 lim = limfield (line, key);
2128               else
2129                 lim = line->text + line->length;
2130               /* lim = end of key field */
2131
2132               if (key->sword >= 0) /* key->sword = start of key */
2133                 text = begfield (line, key);
2134               else
2135                 text = line->text;
2136               /* text = start of field */
2137             }
2138           else
2139             {
2140               /* First key is always the whole line */
2141               text = line->keybeg;
2142               lim = line->keylim;
2143             }
2144           /* text = start of text to sort
2145              lim  = end of text to sort    */
2146
2147           look_for_fraction (text, lim);
2148
2149           /* nls_fraction_found = decimal_point found? */
2150
2151           if ((n_key = n_key->next) == nls_keyhead)
2152             break;  /* No more keys for this line */
2153         }
2154     }
2155   nls_fraction_found = 1;
2156   /* decide on current decimal_point known */
2157 }
2158
2159 #endif
2160
2161 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
2162
2163 static void
2164 sortlines (struct line *lines, int nlines, struct line *temp)
2165 {
2166   register struct line *lo, *hi, *t;
2167   register int nlo, nhi;
2168
2169   if (nlines == 2)
2170     {
2171       if (compare (&lines[0], &lines[1]) > 0)
2172         {
2173           *temp = lines[0];
2174           lines[0] = lines[1];
2175           lines[1] = *temp;
2176         }
2177       return;
2178     }
2179
2180   nlo = nlines / 2;
2181   lo = lines;
2182   nhi = nlines - nlo;
2183   hi = lines + nlo;
2184
2185   if (nlo > 1)
2186     sortlines (lo, nlo, temp);
2187
2188   if (nhi > 1)
2189     sortlines (hi, nhi, temp);
2190
2191   t = temp;
2192
2193   while (nlo && nhi)
2194     if (compare (lo, hi) <= 0)
2195       *t++ = *lo++, --nlo;
2196     else
2197       *t++ = *hi++, --nhi;
2198   while (nlo--)
2199     *t++ = *lo++;
2200
2201   for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
2202     *lo++ = *t++;
2203 }
2204
2205 /* Check that each of the NFILES FILES is ordered.
2206    Return a count of disordered files. */
2207
2208 static int
2209 check (char **files, int nfiles)
2210 {
2211   int i, disorders = 0;
2212   FILE *fp;
2213
2214   for (i = 0; i < nfiles; ++i)
2215     {
2216       fp = xfopen (files[i], "r");
2217       if (checkfp (fp, files[i]))
2218         {
2219           ++disorders;
2220         }
2221     }
2222   return disorders;
2223 }
2224
2225 /* Merge NFILES FILES onto OFP. */
2226
2227 static void
2228 merge (char **files, int nfiles, FILE *ofp)
2229 {
2230   int i, j, t;
2231   char *temp;
2232   FILE *fps[NMERGE], *tfp;
2233
2234   while (nfiles > NMERGE)
2235     {
2236       t = 0;
2237       for (i = 0; i < nfiles / NMERGE; ++i)
2238         {
2239           for (j = 0; j < NMERGE; ++j)
2240             fps[j] = xfopen (files[i * NMERGE + j], "r");
2241           tfp = xtmpfopen (temp = tempname ());
2242           mergefps (fps, NMERGE, tfp);
2243           xfclose (tfp);
2244           for (j = 0; j < NMERGE; ++j)
2245             zaptemp (files[i * NMERGE + j]);
2246           files[t++] = temp;
2247         }
2248       for (j = 0; j < nfiles % NMERGE; ++j)
2249         fps[j] = xfopen (files[i * NMERGE + j], "r");
2250       tfp = xtmpfopen (temp = tempname ());
2251       mergefps (fps, nfiles % NMERGE, tfp);
2252       xfclose (tfp);
2253       for (j = 0; j < nfiles % NMERGE; ++j)
2254         zaptemp (files[i * NMERGE + j]);
2255       files[t++] = temp;
2256       nfiles = t;
2257     }
2258
2259   for (i = 0; i < nfiles; ++i)
2260     fps[i] = xfopen (files[i], "r");
2261   mergefps (fps, i, ofp);
2262   for (i = 0; i < nfiles; ++i)
2263     zaptemp (files[i]);
2264 }
2265
2266 /* Sort NFILES FILES onto OFP. */
2267
2268 static void
2269 sort (char **files, int nfiles, FILE *ofp)
2270 {
2271   struct buffer buf;
2272   struct lines lines;
2273   struct line *tmp;
2274   int i, ntmp;
2275   FILE *fp, *tfp;
2276   struct tempnode *node;
2277   int n_temp_files = 0;
2278   char **tempfiles;
2279
2280   initbuf (&buf, sortalloc);
2281   initlines (&lines, sortalloc / linelength + 1,
2282              LINEALLOC / sizeof (struct line));
2283   ntmp = lines.alloc;
2284   tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
2285
2286   while (nfiles--)
2287     {
2288       fp = xfopen (*files++, "r");
2289       while (fillbuf (&buf, fp))
2290         {
2291           findlines (&buf, &lines);
2292           if (lines.used > ntmp)
2293             {
2294               while (lines.used > ntmp)
2295                 ntmp *= 2;
2296               tmp = (struct line *)
2297                 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
2298             }
2299 #ifdef ENABLE_NLS
2300           if (nls_keyhead)
2301             nls_keyhead = nls_keyhead->next;
2302           if (!nls_fraction_found && nls_keyhead)
2303             nls_numeric_format (lines.lines, lines.used);
2304 #endif
2305           sortlines (lines.lines, lines.used, tmp);
2306           if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
2307             {
2308               tfp = ofp;
2309             }
2310           else
2311             {
2312               ++n_temp_files;
2313               tfp = xtmpfopen (tempname ());
2314             }
2315           for (i = 0; i < lines.used; ++i)
2316             if (!unique || i == 0
2317                 || compare (&lines.lines[i], &lines.lines[i - 1]))
2318               {
2319                 write_bytes (lines.lines[i].text, lines.lines[i].length, tfp);
2320                 putc (eolchar, tfp);
2321               }
2322           if (tfp != ofp)
2323             xfclose (tfp);
2324         }
2325       xfclose (fp);
2326     }
2327
2328   free (buf.buf);
2329   free ((char *) lines.lines);
2330   free ((char *) tmp);
2331
2332   if (n_temp_files)
2333     {
2334       tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
2335       i = n_temp_files;
2336       for (node = temphead.next; i > 0; node = node->next)
2337         tempfiles[--i] = node->name;
2338       merge (tempfiles, n_temp_files, ofp);
2339       free ((char *) tempfiles);
2340     }
2341 }
2342
2343 /* Insert key KEY at the end of the list (`keyhead'). */
2344
2345 static void
2346 insertkey (struct keyfield *key)
2347 {
2348   struct keyfield *k = &keyhead;
2349
2350   while (k->next)
2351     k = k->next;
2352   k->next = key;
2353   key->next = NULL;
2354 #ifdef ENABLE_NLS
2355   if (key->numeric || key->general_numeric)
2356     {
2357       struct nls_keyfield *nk;
2358
2359       nk = (struct nls_keyfield *) xmalloc (sizeof (struct nls_keyfield));
2360       nk->key = key;
2361       if (nls_keyhead)
2362         {
2363           nk->next = nls_keyhead->next;
2364           nls_keyhead->next = nk;
2365         }
2366       else
2367         nk->next = nk;
2368       nls_keyhead = nk;
2369     }
2370 #endif
2371 }
2372
2373 static void
2374 badfieldspec (const char *s)
2375 {
2376   error (SORT_FAILURE, 0, _("invalid field specification `%s'"), s);
2377 }
2378
2379 /* Handle interrupts and hangups. */
2380
2381 static void
2382 sighandler (int sig)
2383 {
2384 #ifdef SA_INTERRUPT
2385   struct sigaction sigact;
2386
2387   sigact.sa_handler = SIG_DFL;
2388   sigemptyset (&sigact.sa_mask);
2389   sigact.sa_flags = 0;
2390   sigaction (sig, &sigact, NULL);
2391 #else                           /* !SA_INTERRUPT */
2392   signal (sig, SIG_DFL);
2393 #endif                          /* SA_INTERRUPT */
2394   cleanup ();
2395   kill (getpid (), sig);
2396 }
2397
2398 /* Set the ordering options for KEY specified in S.
2399    Return the address of the first character in S that
2400    is not a valid ordering option.
2401    BLANKTYPE is the kind of blanks that 'b' should skip. */
2402
2403 static char *
2404 set_ordering (register const char *s, struct keyfield *key,
2405               enum blanktype blanktype)
2406 {
2407   while (*s)
2408     {
2409       switch (*s)
2410         {
2411         case 'b':
2412           if (blanktype == bl_start || blanktype == bl_both)
2413             key->skipsblanks = 1;
2414           if (blanktype == bl_end || blanktype == bl_both)
2415             key->skipeblanks = 1;
2416           break;
2417         case 'd':
2418           key->ignore = nondictionary;
2419           break;
2420         case 'f':
2421           key->translate = fold_toupper;
2422           break;
2423         case 'g':
2424           key->general_numeric = 1;
2425           break;
2426         case 'i':
2427           key->ignore = nonprinting;
2428           break;
2429         case 'M':
2430           key->month = 1;
2431           break;
2432         case 'n':
2433           key->numeric = 1;
2434           break;
2435         case 'r':
2436           key->reverse = 1;
2437           break;
2438         default:
2439           return (char *) s;
2440         }
2441       ++s;
2442     }
2443   return (char *) s;
2444 }
2445
2446 static void
2447 key_init (struct keyfield *key)
2448 {
2449   memset (key, 0, sizeof (*key));
2450   key->eword = -1;
2451 }
2452
2453 /* strdup and return the result of setlocale, but guard against a NULL
2454    return value.  If setlocale returns NULL, strdup FAIL_VAL instead.  */
2455
2456 #if defined ENABLE_NLS && ( !defined __GLIBC__ || __GLIBC__ < 2 )
2457 static inline char *
2458 my_setlocale (const char *locale, const char *fail_val)
2459 {
2460   char *s = setlocale (LC_ALL, locale);
2461   if (s == NULL)
2462     s = (char *) fail_val;
2463   return xstrdup (s);
2464 }
2465 #endif
2466
2467 int
2468 main (int argc, char **argv)
2469 {
2470   struct keyfield *key = NULL, gkey;
2471   char *s;
2472   int i, t, t2;
2473   int checkonly = 0, mergeonly = 0, nfiles = 0;
2474   char *minus = "-", *outfile = minus, **files, *tmp;
2475   FILE *ofp;
2476 #ifdef SA_INTERRUPT
2477   struct sigaction oldact, newact;
2478 #endif                          /* SA_INTERRUPT */
2479
2480   program_name = argv[0];
2481
2482 #ifdef ENABLE_NLS
2483
2484   /* Determine whether the current locale is C or POSIX.  */
2485 # if defined __GLIBC__ && __GLIBC__ >= 2
2486   s = setlocale (LC_ALL, "");
2487   if (s != NULL && !STREQ (s, "C") && !STREQ (s, "POSIX"))
2488     /* The current locale is neither C nor POSIX.  We'll need to do
2489        more work.  */
2490     need_locale = 1;
2491 # else
2492   {
2493     char *c_locale_string = my_setlocale ("C", "");
2494     char *posix_locale_string = my_setlocale ("POSIX", "");
2495     char *current_locale_string = setlocale (LC_ALL, "");
2496
2497     if (current_locale_string != NULL
2498         && !STREQ (current_locale_string, c_locale_string)
2499         && !STREQ (current_locale_string, posix_locale_string))
2500         /* The current locale is neither C nor POSIX.
2501            We'll need to do more work.  */
2502         need_locale = 1;
2503
2504     free (c_locale_string);
2505     free (posix_locale_string);
2506   }
2507 # endif
2508
2509   /* Let's get locale's representation of the decimal point */
2510   {
2511     struct lconv *lconvp = localeconv ();
2512
2513     decimal_point = *lconvp->decimal_point;
2514     th_sep        = *lconvp->thousands_sep;
2515     nls_grouping  =  (char *) (lconvp->grouping);
2516   }
2517
2518   /* if locale doesn't define a decimal point, we'll use the
2519      US notation.                                            */
2520   if (decimal_point == '\0')
2521     decimal_point = FLOATING_POINT;
2522   else
2523     nls_fraction_found = 0;  /* Figure out which decimal point to use  */
2524
2525   nls_month_found = 0;  /* Figure out which month notation to use */
2526
2527   monthtab = nls_monthtab;
2528
2529 #endif /* NLS */
2530
2531   bindtextdomain (PACKAGE, LOCALEDIR);
2532   textdomain (PACKAGE);
2533
2534   parse_long_options (argc, argv, PROGRAM_NAME, GNU_PACKAGE, VERSION,
2535                       AUTHORS, usage);
2536
2537   have_read_stdin = 0;
2538   inittables ();
2539
2540   temp_dir = getenv ("TMPDIR");
2541   if (temp_dir == NULL)
2542     temp_dir = DEFAULT_TMPDIR;
2543
2544   /* Change the way xmalloc and xrealloc fail.  */
2545   xalloc_exit_failure = SORT_FAILURE;
2546   xalloc_fail_func = cleanup;
2547
2548 #ifdef SA_INTERRUPT
2549   newact.sa_handler = sighandler;
2550   sigemptyset (&newact.sa_mask);
2551   newact.sa_flags = 0;
2552
2553   sigaction (SIGINT, NULL, &oldact);
2554   if (oldact.sa_handler != SIG_IGN)
2555     sigaction (SIGINT, &newact, NULL);
2556   sigaction (SIGHUP, NULL, &oldact);
2557   if (oldact.sa_handler != SIG_IGN)
2558     sigaction (SIGHUP, &newact, NULL);
2559   sigaction (SIGPIPE, NULL, &oldact);
2560   if (oldact.sa_handler != SIG_IGN)
2561     sigaction (SIGPIPE, &newact, NULL);
2562   sigaction (SIGTERM, NULL, &oldact);
2563   if (oldact.sa_handler != SIG_IGN)
2564     sigaction (SIGTERM, &newact, NULL);
2565 #else                           /* !SA_INTERRUPT */
2566   if (signal (SIGINT, SIG_IGN) != SIG_IGN)
2567     signal (SIGINT, sighandler);
2568   if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
2569     signal (SIGHUP, sighandler);
2570   if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
2571     signal (SIGPIPE, sighandler);
2572   if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
2573     signal (SIGTERM, sighandler);
2574 #endif                          /* !SA_INTERRUPT */
2575
2576   gkey.sword = gkey.eword = -1;
2577   gkey.ignore = NULL;
2578   gkey.translate = NULL;
2579   gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
2580   gkey.skipsblanks = gkey.skipeblanks = 0;
2581
2582   files = (char **) xmalloc (sizeof (char *) * argc);
2583
2584   for (i = 1; i < argc; ++i)
2585     {
2586       if (argv[i][0] == '+')
2587         {
2588           if (key)
2589             insertkey (key);
2590           key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
2591           key_init (key);
2592           s = argv[i] + 1;
2593           if (! (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1]))))
2594             badfieldspec (argv[i]);
2595           for (t = 0; ISDIGIT (*s); ++s)
2596             t = 10 * t + *s - '0';
2597           t2 = 0;
2598           if (*s == '.')
2599             for (++s; ISDIGIT (*s); ++s)
2600               t2 = 10 * t2 + *s - '0';
2601           if (t2 || t)
2602             {
2603               key->sword = t;
2604               key->schar = t2;
2605             }
2606           else
2607             key->sword = -1;
2608           s = set_ordering (s, key, bl_start);
2609           if (*s)
2610             badfieldspec (argv[i]);
2611         }
2612       else if (argv[i][0] == '-' && argv[i][1])
2613         {
2614           s = argv[i] + 1;
2615           if (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1])))
2616             {
2617               if (!key)
2618                 {
2619                   /* Provoke with `sort -9'.  */
2620                   error (0, 0, _("when using the old-style +POS and -POS \
2621 key specifiers,\nthe +POS specifier must come first"));
2622                   usage (SORT_FAILURE);
2623                 }
2624               for (t = 0; ISDIGIT (*s); ++s)
2625                 t = t * 10 + *s - '0';
2626               t2 = 0;
2627               if (*s == '.')
2628                 for (++s; ISDIGIT (*s); ++s)
2629                   t2 = t2 * 10 + *s - '0';
2630               key->eword = t;
2631               key->echar = t2;
2632               s = set_ordering (s, key, bl_end);
2633               if (*s)
2634                 badfieldspec (argv[i]);
2635               insertkey (key);
2636               key = NULL;
2637             }
2638           else
2639             while (*s)
2640               {
2641                 s = set_ordering (s, &gkey, bl_both);
2642                 switch (*s)
2643                   {
2644                   case '\0':
2645                     break;
2646                   case 'c':
2647                     checkonly = 1;
2648                     break;
2649                   case 'k':
2650                     if (s[1])
2651                       ++s;
2652                     else
2653                       {
2654                         if (i == argc - 1)
2655                           error (SORT_FAILURE, 0,
2656                                  _("option `-k' requires an argument"));
2657                         else
2658                           s = argv[++i];
2659                       }
2660                     if (key)
2661                       insertkey (key);
2662                     key = (struct keyfield *)
2663                       xmalloc (sizeof (struct keyfield));
2664                     key_init (key);
2665                     /* Get POS1. */
2666                     if (!ISDIGIT (*s))
2667                       badfieldspec (argv[i]);
2668                     for (t = 0; ISDIGIT (*s); ++s)
2669                       t = 10 * t + *s - '0';
2670                     if (t == 0)
2671                       {
2672                         /* Provoke with `sort -k0' */
2673                         error (0, 0, _("the starting field number argument \
2674 to the `-k' option must be positive"));
2675                         badfieldspec (argv[i]);
2676                       }
2677                     --t;
2678                     t2 = 0;
2679                     if (*s == '.')
2680                       {
2681                         if (!ISDIGIT (s[1]))
2682                           {
2683                             /* Provoke with `sort -k1.' */
2684                             error (0, 0, _("starting field spec has `.' but \
2685 lacks following character offset"));
2686                             badfieldspec (argv[i]);
2687                           }
2688                         for (++s; ISDIGIT (*s); ++s)
2689                           t2 = 10 * t2 + *s - '0';
2690                         if (t2 == 0)
2691                           {
2692                             /* Provoke with `sort -k1.0' */
2693                             error (0, 0, _("starting field character offset \
2694 argument to the `-k' option\nmust be positive"));
2695                             badfieldspec (argv[i]);
2696                           }
2697                         --t2;
2698                       }
2699                     if (t2 || t)
2700                       {
2701                         key->sword = t;
2702                         key->schar = t2;
2703                       }
2704                     else
2705                       key->sword = -1;
2706                     s = set_ordering (s, key, bl_start);
2707                     if (*s == 0)
2708                       {
2709                         key->eword = -1;
2710                         key->echar = 0;
2711                       }
2712                     else if (*s != ',')
2713                       badfieldspec (argv[i]);
2714                     else if (*s == ',')
2715                       {
2716                         /* Skip over comma.  */
2717                         ++s;
2718                         if (*s == 0)
2719                           {
2720                             /* Provoke with `sort -k1,' */
2721                             error (0, 0, _("field specification has `,' but \
2722 lacks following field spec"));
2723                             badfieldspec (argv[i]);
2724                           }
2725                         /* Get POS2. */
2726                         for (t = 0; ISDIGIT (*s); ++s)
2727                           t = t * 10 + *s - '0';
2728                         if (t == 0)
2729                           {
2730                             /* Provoke with `sort -k1,0' */
2731                             error (0, 0, _("ending field number argument \
2732 to the `-k' option must be positive"));
2733                             badfieldspec (argv[i]);
2734                           }
2735                         --t;
2736                         t2 = 0;
2737                         if (*s == '.')
2738                           {
2739                             if (!ISDIGIT (s[1]))
2740                               {
2741                                 /* Provoke with `sort -k1,1.' */
2742                                 error (0, 0, _("ending field spec has `.' \
2743 but lacks following character offset"));
2744                                 badfieldspec (argv[i]);
2745                               }
2746                             for (++s; ISDIGIT (*s); ++s)
2747                               t2 = t2 * 10 + *s - '0';
2748                           }
2749                         else
2750                           {
2751                             /* `-k 2,3' is equivalent to `+1 -3'.  */
2752                             ++t;
2753                           }
2754                         key->eword = t;
2755                         key->echar = t2;
2756                         s = set_ordering (s, key, bl_end);
2757                         if (*s)
2758                           badfieldspec (argv[i]);
2759                       }
2760                     insertkey (key);
2761                     key = NULL;
2762                     goto outer;
2763                   case 'm':
2764                     mergeonly = 1;
2765                     break;
2766                   case 'o':
2767                     if (s[1])
2768                       outfile = s + 1;
2769                     else
2770                       {
2771                         if (i == argc - 1)
2772                           error (SORT_FAILURE, 0,
2773                                  _("option `-o' requires an argument"));
2774                         else
2775                           outfile = argv[++i];
2776                       }
2777                     goto outer;
2778                   case 's':
2779                     stable = 1;
2780                     break;
2781                   case 't':
2782                     if (s[1])
2783                       tab = *++s;
2784                     else if (i < argc - 1)
2785                       {
2786                         tab = *argv[++i];
2787                         goto outer;
2788                       }
2789                     else
2790                       error (SORT_FAILURE, 0,
2791                              _("option `-t' requires an argument"));
2792                     break;
2793                   case 'T':
2794                     if (s[1])
2795                       temp_dir = ++s;
2796                     else
2797                       {
2798                         if (i < argc - 1)
2799                           temp_dir = argv[++i];
2800                         else
2801                           error (SORT_FAILURE, 0,
2802                                  _("option `-T' requires an argument"));
2803                       }
2804                     goto outer;
2805                     /* break; */
2806                   case 'u':
2807                     unique = 1;
2808                     break;
2809                   case 'z':
2810                     eolchar = 0;
2811                     break;
2812                   case 'y':
2813                     /* Accept and ignore e.g. -y0 for compatibility with
2814                        Solaris 2.  */
2815                     goto outer;
2816                   default:
2817                     fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2818                              argv[0], *s);
2819                     usage (SORT_FAILURE);
2820                   }
2821                 if (*s)
2822                   ++s;
2823               }
2824         }
2825       else                      /* Not an option. */
2826         {
2827           files[nfiles++] = argv[i];
2828         }
2829     outer:;
2830     }
2831
2832   if (key)
2833     insertkey (key);
2834
2835   /* Inheritance of global options to individual keys. */
2836   for (key = keyhead.next; key; key = key->next)
2837     if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2838         && !key->skipeblanks && !key->month && !key->numeric
2839         && !key->general_numeric)
2840       {
2841         key->ignore = gkey.ignore;
2842         key->translate = gkey.translate;
2843         key->skipsblanks = gkey.skipsblanks;
2844         key->skipeblanks = gkey.skipeblanks;
2845         key->month = gkey.month;
2846         key->numeric = gkey.numeric;
2847         key->general_numeric = gkey.general_numeric;
2848         key->reverse = gkey.reverse;
2849       }
2850
2851   if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2852                         || gkey.skipeblanks || gkey.month || gkey.numeric
2853                         || gkey.general_numeric))
2854     insertkey (&gkey);
2855   reverse = gkey.reverse;
2856
2857   if (nfiles == 0)
2858     {
2859       nfiles = 1;
2860       files = &minus;
2861     }
2862
2863   if (checkonly)
2864     {
2865       /* POSIX requires that sort return 1 IFF invoked with -c and the
2866          input is not properly sorted.  */
2867       exit (check (files, nfiles) == 0 ? 0 : 1);
2868     }
2869
2870   if (!STREQ (outfile, "-"))
2871     {
2872       struct stat outstat;
2873       if (stat (outfile, &outstat) == 0)
2874         {
2875           /* The following code prevents a race condition when
2876              people use the brain dead shell programming idiom:
2877                   cat file | sort -o file
2878              This feature is provided for historical compatibility,
2879              but we strongly discourage ever relying on this in
2880              new shell programs. */
2881
2882           /* Temporarily copy each input file that might be another name
2883              for the output file.  When in doubt (e.g. a pipe), copy.  */
2884           for (i = 0; i < nfiles; ++i)
2885             {
2886               char buf[8192];
2887               FILE *in_fp;
2888               FILE *out_fp;
2889               int cc;
2890
2891               if (S_ISREG (outstat.st_mode) && !STREQ (outfile, files[i]))
2892                 {
2893                   struct stat instat;
2894                   if ((STREQ (files[i], "-")
2895                        ? fstat (STDIN_FILENO, &instat)
2896                        : stat (files[i], &instat)) != 0)
2897                     {
2898                       error (0, errno, "%s", files[i]);
2899                       cleanup ();
2900                       exit (SORT_FAILURE);
2901                     }
2902                   if (S_ISREG (instat.st_mode) && !SAME_INODE (instat, outstat))
2903                     {
2904                       /* We know the files are distinct.  */
2905                       continue;
2906                     }
2907                 }
2908
2909               in_fp = xfopen (files[i], "r");
2910               tmp = tempname ();
2911               out_fp = xtmpfopen (tmp);
2912               /* FIXME: maybe use copy.c(copy) here. */
2913               while ((cc = fread (buf, 1, sizeof buf, in_fp)) > 0)
2914                 write_bytes (buf, cc, out_fp);
2915               if (ferror (in_fp))
2916                 {
2917                   error (0, errno, "%s", files[i]);
2918                   cleanup ();
2919                   exit (SORT_FAILURE);
2920                 }
2921               xfclose (out_fp);
2922               xfclose (in_fp);
2923               files[i] = tmp;
2924             }
2925           ofp = xfopen (outfile, "w");
2926         }
2927       else
2928         {
2929           /* A non-`-' outfile was specified, but the file doesn't yet exist.
2930              Before opening it for writing (thus creating it), make sure all
2931              of the input files exist.  Otherwise, creating the output file
2932              could create an otherwise missing input file, making sort succeed
2933              when it should fail.  */
2934           for (i = 0; i < nfiles; ++i)
2935             {
2936               struct stat sb;
2937               if (STREQ (files[i], "-"))
2938                 continue;
2939               if (stat (files[i], &sb))
2940                 {
2941                   error (0, errno, "%s", files[i]);
2942                   cleanup ();
2943                   exit (SORT_FAILURE);
2944                 }
2945             }
2946
2947           ofp = xfopen (outfile, "w");
2948         }
2949     }
2950   else
2951     {
2952       ofp = stdout;
2953     }
2954
2955   if (mergeonly)
2956     merge (files, nfiles, ofp);
2957   else
2958     sort (files, nfiles, ofp);
2959   cleanup ();
2960
2961   /* If we wait for the implicit flush on exit, and the parent process
2962      has closed stdout (e.g., exec >&- in a shell), then the output file
2963      winds up empty.  I don't understand why.  This is under SunOS,
2964      Solaris, Ultrix, and Irix.  This premature fflush makes the output
2965      reappear. --karl@cs.umb.edu  */
2966   if (fflush (ofp) < 0)
2967     error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2968
2969   if (have_read_stdin && fclose (stdin) == EOF)
2970     error (SORT_FAILURE, errno, "%s", outfile);
2971   if (ferror (stdout) || fclose (stdout) == EOF)
2972     error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2973
2974   exit (EXIT_SUCCESS);
2975 }