src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 88, 1991-2002 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software
  16    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17
  18    Written December 1988 by Mike Haertel.
  19    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  20    or (US mail) as Mike Haertel c/o Free Software Foundation.
  21
  22    Ørn E. Hansen added NLS support in 1997.  */
  23
  24 #include <config.h>
  25
  26 #include <getopt.h>
  27 #include <sys/types.h>
  28 #include <signal.h>
  29 #include <stdio.h>
  30 #include <assert.h>
  31 #include "system.h"
  32 #include "long-options.h"
  33 #include "error.h"
  34 #include "hard-locale.h"
  35 #include "human.h"
  36 #include "physmem.h"
  37 #include "posixver.h"
  38 #include "stdio-safer.h"
  39 #include "xmemcoll.h"
  40 #include "xstrtol.h"
  41
  42 #if HAVE_SYS_RESOURCE_H
  43 # include <sys/resource.h>
  44 #endif
  45 #ifndef RLIMIT_DATA
  46 struct rlimit { size_t rlim_cur; };
  47 # define getrlimit(Resource, Rlp) (-1)
  48 #endif
  49
  50 /* The official name of this program (e.g., no `g' prefix).  */
  51 #define PROGRAM_NAME "sort"
  52
  53 #define AUTHORS N_ ("Mike Haertel and Paul Eggert")
  54
  55 #if HAVE_LANGINFO_H
  56 # include <langinfo.h>
  57 #endif
  58
  59 #ifndef SA_NOCLDSTOP
  60 # define sigprocmask(How, Set, Oset) /* empty */
  61 # define sigset_t int
  62 #endif
  63
  64 #ifndef STDC_HEADERS
  65 double strtod ();
  66 #endif
  67
  68 /* Undefine, to avoid warning about redefinition on some systems.  */
  69 /* FIXME: Remove these: use MIN/MAX from sys2.h.  */
  70 #undef min
  71 #define min(a, b) ((a) < (b) ? (a) : (b))
  72 #undef max
  73 #define max(a, b) ((a) > (b) ? (a) : (b))
  74
  75 #define UCHAR_LIM (UCHAR_MAX + 1)
  76 #define UCHAR(c) ((unsigned char) (c))
  77
  78 #ifndef DEFAULT_TMPDIR
  79 # define DEFAULT_TMPDIR "/tmp"
  80 #endif
  81
  82 /* Use this as exit status in case of error, not EXIT_FAILURE.  This
  83    is necessary because EXIT_FAILURE is usually 1 and POSIX requires
  84    that sort exit with status 1 IFF invoked with -c and the input is
  85    not properly sorted.  Any other irregular exit must exit with a
  86    status code greater than 1.  */
  87 #define SORT_FAILURE 2
  88 #define SORT_OUT_OF_ORDER 1
  89
  90 #define C_DECIMAL_POINT '.'
  91 #define NEGATION_SIGN   '-'
  92 #define NUMERIC_ZERO    '0'
  93
  94 #if HAVE_SETLOCALE
  95
  96 static char decimal_point;
  97 static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
  98
  99 /* Nonzero if the corresponding locales are hard.  */
 100 static int hard_LC_COLLATE;
 101 # if HAVE_NL_LANGINFO
 102 static int hard_LC_TIME;
 103 # endif
 104
 105 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)
 106
 107 #else
 108
 109 # define decimal_point C_DECIMAL_POINT
 110 # define IS_THOUSANDS_SEP(x) 0
 111
 112 #endif
 113
 114 #define NONZERO(x) (x != 0)
 115
 116 /* The kind of blanks for '-b' to skip in various options. */
 117 enum blanktype { bl_start, bl_end, bl_both };
 118
 119 /* The character marking end of line. Default to \n. */
 120 static int eolchar = '\n';
 121
 122 /* Lines are held in core as counted strings. */
 123 struct line
 124 {
 125   char *text;                   /* Text of the line. */
 126   size_t length;                /* Length including final newline. */
 127   char *keybeg;                 /* Start of first key. */
 128   char *keylim;                 /* Limit of first key. */
 129 };
 130
 131 /* Input buffers. */
 132 struct buffer
 133 {
 134   char *buf;                    /* Dynamically allocated buffer,
 135                                    partitioned into 3 regions:
 136                                    - input data;
 137                                    - unused area;
 138                                    - an array of lines, in reverse order.  */
 139   size_t used;                  /* Number of bytes used for input data.  */
 140   size_t nlines;                /* Number of lines in the line array.  */
 141   size_t alloc;                 /* Number of bytes allocated. */
 142   size_t left;                  /* Number of bytes left from previous reads. */
 143   size_t line_bytes;            /* Number of bytes to reserve for each line. */
 144   int eof;                      /* An EOF has been read.  */
 145 };
 146
 147 struct keyfield
 148 {
 149   size_t sword;                 /* Zero-origin 'word' to start at. */
 150   size_t schar;                 /* Additional characters to skip. */
 151   int skipsblanks;              /* Skip leading white space at start. */
 152   size_t eword;                 /* Zero-origin first word after field. */
 153   size_t echar;                 /* Additional characters in field. */
 154   int skipeblanks;              /* Skip trailing white space at finish. */
 155   int *ignore;                  /* Boolean array of characters to ignore. */
 156   char *translate;              /* Translation applied to characters. */
 157   int numeric;                  /* Flag for numeric comparison.  Handle
 158                                    strings of digits with optional decimal
 159                                    point, but no exponential notation. */
 160   int general_numeric;          /* Flag for general, numeric comparison.
 161                                    Handle numbers in exponential notation. */
 162   int month;                    /* Flag for comparison by month name. */
 163   int reverse;                  /* Reverse the sense of comparison. */
 164   struct keyfield *next;        /* Next keyfield to try. */
 165 };
 166
 167 struct month
 168 {
 169   char *name;
 170   int val;
 171 };
 172
 173 /* The name this program was run with. */
 174 char *program_name;
 175
 176 /* FIXME: None of these tables work with multibyte character sets.
 177    Also, there are many other bugs when handling multibyte characters,
 178    or even unibyte encodings where line boundaries are not in the
 179    initial shift state.  One way to fix this is to rewrite `sort' to
 180    use wide characters internally, but doing this with good
 181    performance is a bit tricky.  */
 182
 183 /* Table of white space. */
 184 static int blanks[UCHAR_LIM];
 185
 186 /* Table of non-printing characters. */
 187 static int nonprinting[UCHAR_LIM];
 188
 189 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 190 static int nondictionary[UCHAR_LIM];
 191
 192 /* Translation table folding lower case to upper.  */
 193 static char fold_toupper[UCHAR_LIM];
 194
 195 #define MONTHS_PER_YEAR 12
 196
 197 /* Table mapping month names to integers.
 198    Alphabetic order allows binary search. */
 199 static struct month monthtab[] =
 200 {
 201   {"APR", 4},
 202   {"AUG", 8},
 203   {"DEC", 12},
 204   {"FEB", 2},
 205   {"JAN", 1},
 206   {"JUL", 7},
 207   {"JUN", 6},
 208   {"MAR", 3},
 209   {"MAY", 5},
 210   {"NOV", 11},
 211   {"OCT", 10},
 212   {"SEP", 9}
 213 };
 214
 215 /* During the merge phase, the number of files to merge at once. */
 216 #define NMERGE 16
 217
 218 /* Minimum size for a merge or check buffer.  */
 219 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
 220
 221 /* Minimum sort size; the code might not work with smaller sizes.  */
 222 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
 223
 224 /* The number of bytes needed for a merge or check buffer, which can
 225    function relatively efficiently even if it holds only one line.  If
 226    a longer line is seen, this value is increased.  */
 227 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
 228
 229 /* The approximate maximum number of bytes of main memory to use, as
 230    specified by the user.  Zero if the user has not specified a size.  */
 231 static size_t sort_size;
 232
 233 /* The guessed size for non-regular files.  */
 234 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
 235
 236 /* Array of directory names in which any temporary files are to be created. */
 237 static char const **temp_dirs;
 238
 239 /* Number of temporary directory names used.  */
 240 static size_t temp_dir_count;
 241
 242 /* Number of allocated slots in temp_dirs.  */
 243 static size_t temp_dir_alloc;
 244
 245 /* Our process ID.  */
 246 static pid_t process_id;
 247
 248 /* Flag to reverse the order of all comparisons. */
 249 static int reverse;
 250
 251 /* Flag for stable sort.  This turns off the last ditch bytewise
 252    comparison of lines, and instead leaves lines in the same order
 253    they were read if all keys compare equal.  */
 254 static int stable;
 255
 256 /* Tab character separating fields.  If NUL, then fields are separated
 257    by the empty string between a non-whitespace character and a whitespace
 258    character. */
 259 static char tab;
 260
 261 /* Flag to remove consecutive duplicate lines from the output.
 262    Only the last of a sequence of equal lines will be output. */
 263 static int unique;
 264
 265 /* Nonzero if any of the input files are the standard input. */
 266 static int have_read_stdin;
 267
 268 /* List of key field comparisons to be tried.  */
 269 static struct keyfield *keylist;
 270
 271 void
 272 usage (int status)
 273 {
 274   if (status != 0)
 275     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 276              program_name);
 277   else
 278     {
 279       printf (_("\
 280 Usage: %s [OPTION]... [FILE]...\n\
 281 "),
 282               program_name);
 283       fputs (_("\
 284 Write sorted concatenation of all FILE(s) to standard output.\n\
 285 \n\
 286 Ordering options:\n\
 287 \n\
 288 "), stdout);
 289       fputs (_("\
 290 Mandatory arguments to long options are mandatory for short options too.\n\
 291 "), stdout);
 292       fputs (_("\
 293   -b, --ignore-leading-blanks ignore leading blanks\n\
 294   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
 295   -f, --ignore-case           fold lower case to upper case characters\n\
 296 "), stdout);
 297       fputs (_("\
 298   -g, --general-numeric-sort  compare according to general numerical value\n\
 299   -i, --ignore-nonprinting    consider only printable characters\n\
 300   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 301   -n, --numeric-sort          compare according to string numerical value\n\
 302   -r, --reverse               reverse the result of comparisons\n\
 303 \n\
 304 "), stdout);
 305       fputs (_("\
 306 Other options:\n\
 307 \n\
 308   -c, --check               check whether input is sorted; do not sort\n\
 309   -k, --key=POS1[,POS2]     start a key at POS1, end it at POS 2 (origin 1)\n\
 310   -m, --merge               merge already sorted files; do not sort\n\
 311   -o, --output=FILE         write result to FILE instead of standard output\n\
 312   -s, --stable              stabilize sort by disabling last-resort comparison\n\
 313   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
 314 "), stdout);
 315       printf (_("\
 316   -t, --field-separator=SEP use SEP instead of non- to whitespace transition\n\
 317   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s\n\
 318                               multiple options specify multiple directories\n\
 319   -u, --unique              with -c: check for strict ordering\n\
 320                               otherwise: output only the first of an equal run\n\
 321 "), DEFAULT_TMPDIR);
 322       fputs (_("\
 323   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 324 "), stdout);
 325       fputs (HELP_OPTION_DESCRIPTION, stdout);
 326       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 327       fputs (_("\
 328 \n\
 329 POS is F[.C][OPTS], where F is the field number and C the character position\n\
 330 in the field.  OPTS is one or more single-letter ordering options, which\n\
 331 override global ordering options for that key.  If no key is given, use the\n\
 332 entire line as the key.\n\
 333 \n\
 334 SIZE may be followed by the following multiplicative suffixes:\n\
 335 "), stdout);
 336       fputs (_("\
 337 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
 338 \n\
 339 With no FILE, or when FILE is -, read standard input.\n\
 340 \n\
 341 *** WARNING ***\n\
 342 The locale specified by the environment affects sort order.\n\
 343 Set LC_ALL=C to get the traditional sort order that uses\n\
 344 native byte values.\n\
 345 "), stdout );
 346       printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
 347     }
 348   /* Don't use EXIT_FAILURE here in case it is defined to be 1.
 349      POSIX requires that sort return 1 IFF invoked with -c and
 350      the input is not properly sorted.  */
 351   assert (status == 0 || status == SORT_FAILURE);
 352   exit (status);
 353 }
 354
 355 #define COMMON_SHORT_OPTIONS "-bcdfgik:mMno:rsS:t:T:uz"
 356
 357 static struct option const long_options[] =
 358 {
 359   {"ignore-leading-blanks", no_argument, NULL, 'b'},
 360   {"check", no_argument, NULL, 'c'},
 361   {"dictionary-order", no_argument, NULL, 'd'},
 362   {"ignore-case", no_argument, NULL, 'f'},
 363   {"general-numeric-sort", no_argument, NULL, 'g'},
 364   {"ignore-nonprinting", no_argument, NULL, 'i'},
 365   {"key", required_argument, NULL, 'k'},
 366   {"merge", no_argument, NULL, 'm'},
 367   {"month-sort", no_argument, NULL, 'M'},
 368   {"numeric-sort", no_argument, NULL, 'n'},
 369   {"output", required_argument, NULL, 'o'},
 370   {"reverse", no_argument, NULL, 'r'},
 371   {"stable", no_argument, NULL, 's'},
 372   {"buffer-size", required_argument, NULL, 'S'},
 373   {"field-separator", required_argument, NULL, 't'},
 374   {"temporary-directory", required_argument, NULL, 'T'},
 375   {"unique", no_argument, NULL, 'u'},
 376   {"zero-terminated", no_argument, NULL, 'z'},
 377   {GETOPT_HELP_OPTION_DECL},
 378   {GETOPT_VERSION_OPTION_DECL},
 379   {0, 0, 0, 0},
 380 };
 381
 382 /* The set of signals that are caught.  */
 383 static sigset_t caught_signals;
 384
 385 /* The list of temporary files. */
 386 struct tempnode
 387 {
 388   struct tempnode *volatile next;
 389   char name[1];  /* Actual size is 1 + file name length.  */
 390 };
 391 static struct tempnode *volatile temphead;
 392
 393 /* Clean up any remaining temporary files. */
 394
 395 static void
 396 cleanup (void)
 397 {
 398   struct tempnode *node;
 399
 400   for (node = temphead; node; node = node->next)
 401     unlink (node->name);
 402 }
 403
 404 /* Report MESSAGE for FILE, then clean up and exit.  */
 405
 406 static void die PARAMS ((char const *, char const *)) ATTRIBUTE_NORETURN;
 407 static void
 408 die (char const *message, char const *file)
 409 {
 410   error (0, errno, "%s: %s", message, file);
 411   exit (SORT_FAILURE);
 412 }
 413
 414 /* Create a new temporary file, returning its newly allocated name.
 415    Store into *PFP a stream open for writing.  */
 416
 417 static char *
 418 create_temp_file (FILE **pfp)
 419 {
 420   static char const slashbase[] = "/sortXXXXXX";
 421   static size_t temp_dir_index;
 422   sigset_t oldset;
 423   int fd;
 424   int saved_errno;
 425   char const *temp_dir = temp_dirs[temp_dir_index];
 426   size_t len = strlen (temp_dir);
 427   struct tempnode *node =
 428     (struct tempnode *) xmalloc (sizeof node->next + len + sizeof slashbase);
 429   char *file = node->name;
 430
 431   memcpy (file, temp_dir, len);
 432   memcpy (file + len, slashbase, sizeof slashbase);
 433   node->next = temphead;
 434   if (++temp_dir_index == temp_dir_count)
 435     temp_dir_index = 0;
 436
 437   /* Create the temporary file in a critical section, to avoid races.  */
 438   sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
 439   fd = mkstemp (file);
 440   if (0 <= fd)
 441     temphead = node;
 442   saved_errno = errno;
 443   sigprocmask (SIG_SETMASK, &oldset, NULL);
 444   errno = saved_errno;
 445
 446   if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
 447     die (_("cannot create temporary file"), file);
 448
 449   return file;
 450 }
 451
 452 static FILE *
 453 xfopen (const char *file, const char *how)
 454 {
 455   FILE *fp;
 456
 457   if (STREQ (file, "-"))
 458     {
 459       if (*how == 'r')
 460         {
 461           have_read_stdin = 1;
 462           fp = stdin;
 463         }
 464       else
 465         fp = stdout;
 466     }
 467   else
 468     {
 469       if ((fp = fopen_safer (file, how)) == NULL)
 470         die (_("open failed"), file);
 471     }
 472
 473   return fp;
 474 }
 475
 476 /* Close FP, whose name is FILE, and report any errors.  */
 477
 478 static void
 479 xfclose (FILE *fp, char const *file)
 480 {
 481   if (fp == stdin)
 482     {
 483       /* Allow reading stdin from tty more than once. */
 484       if (feof (fp))
 485         clearerr (fp);
 486     }
 487   else
 488     {
 489       if (fclose (fp) != 0)
 490         die (_("close failed"), file);
 491     }
 492 }
 493
 494 static void
 495 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
 496 {
 497   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
 498     die (_("write failed"), output_file);
 499 }
 500
 501 /* Append DIR to the array of temporary directory names.  */
 502 static void
 503 add_temp_dir (char const *dir)
 504 {
 505   if (temp_dir_count == temp_dir_alloc)
 506     {
 507       temp_dir_alloc = temp_dir_alloc ? temp_dir_alloc * 2 : 16;
 508       temp_dirs = xrealloc (temp_dirs, sizeof (temp_dirs) * temp_dir_alloc);
 509     }
 510
 511   temp_dirs[temp_dir_count++] = dir;
 512 }
 513
 514 /* Search through the list of temporary files for NAME;
 515    remove it if it is found on the list. */
 516
 517 static void
 518 zaptemp (const char *name)
 519 {
 520   struct tempnode *volatile *pnode;
 521   struct tempnode *node;
 522
 523   for (pnode = &temphead; (node = *pnode); pnode = &node->next)
 524     if (node->name == name)
 525       {
 526         unlink (name);
 527         *pnode = node->next;
 528         free ((char *) node);
 529         break;
 530       }
 531 }
 532
 533 #if HAVE_NL_LANGINFO
 534
 535 static int
 536 struct_month_cmp (const void *m1, const void *m2)
 537 {
 538   return strcmp (((const struct month *) m1)->name,
 539                  ((const struct month *) m2)->name);
 540 }
 541
 542 #endif
 543
 544 /* Initialize the character class tables. */
 545
 546 static void
 547 inittables (void)
 548 {
 549   int i;
 550
 551   for (i = 0; i < UCHAR_LIM; ++i)
 552     {
 553       if (ISBLANK (i))
 554         blanks[i] = 1;
 555       if (!ISPRINT (i))
 556         nonprinting[i] = 1;
 557       if (!ISALNUM (i) && !ISBLANK (i))
 558         nondictionary[i] = 1;
 559       if (ISLOWER (i))
 560         fold_toupper[i] = toupper (i);
 561       else
 562         fold_toupper[i] = i;
 563     }
 564
 565 #if HAVE_NL_LANGINFO
 566   /* If we're not in the "C" locale, read different names for months.  */
 567   if (hard_LC_TIME)
 568     {
 569       for (i = 0; i < MONTHS_PER_YEAR; i++)
 570         {
 571           char *s;
 572           size_t s_len;
 573           size_t j;
 574           char *name;
 575
 576           s = (char *) nl_langinfo (ABMON_1 + i);
 577           s_len = strlen (s);
 578           monthtab[i].name = name = (char *) xmalloc (s_len + 1);
 579           monthtab[i].val = i + 1;
 580
 581           for (j = 0; j < s_len; j++)
 582             name[j] = fold_toupper[UCHAR (s[j])];
 583           name[j] = '\0';
 584         }
 585       qsort ((void *) monthtab, MONTHS_PER_YEAR,
 586              sizeof (struct month), struct_month_cmp);
 587     }
 588 #endif
 589 }
 590
 591 /* Specify the amount of main memory to use when sorting.  */
 592 static void
 593 specify_sort_size (char const *s)
 594 {
 595   uintmax_t n;
 596   char *suffix;
 597   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
 598
 599   /* The default unit is KiB.  */
 600   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
 601     {
 602       if (n <= UINTMAX_MAX / 1024)
 603         n *= 1024;
 604       else
 605         e = LONGINT_OVERFLOW;
 606     }
 607
 608   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
 609   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
 610     switch (suffix[0])
 611       {
 612       case 'b':
 613         e = LONGINT_OK;
 614         break;
 615
 616       case '%':
 617         {
 618           double mem = physmem_total () * n / 100;
 619
 620           /* Use "<", not "<=", to avoid problems with rounding.  */
 621           if (mem < UINTMAX_MAX)
 622             {
 623               n = mem;
 624               e = LONGINT_OK;
 625             }
 626           else
 627             e = LONGINT_OVERFLOW;
 628         }
 629         break;
 630       }
 631
 632   if (e == LONGINT_OK)
 633     {
 634       sort_size = n;
 635       if (sort_size == n)
 636         {
 637           sort_size = MAX (sort_size, MIN_SORT_SIZE);
 638           return;
 639         }
 640
 641       e = LONGINT_OVERFLOW;
 642     }
 643
 644   STRTOL_FATAL_ERROR (s, _("sort size"), e);
 645 }
 646
 647 /* Return the default sort size.  */
 648 static size_t
 649 default_sort_size (void)
 650 {
 651   /* Let MEM be available memory or 1/8 of total memory, whichever
 652      is greater.  */
 653   double avail = physmem_available ();
 654   double total = physmem_total ();
 655   double mem = MAX (avail, total / 8);
 656   struct rlimit rlimit;
 657
 658   /* Let SIZE be MEM, but no more than the maximum object size or
 659      system resource limits.  Avoid the MIN macro here, as it is not
 660      quite right when only one argument is floating point.  Don't
 661      bother to check for values like RLIM_INFINITY since in practice
 662      they are not much less than SIZE_MAX.  */
 663   size_t size = SIZE_MAX;
 664   if (mem < size)
 665     size = mem;
 666   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
 667     size = rlimit.rlim_cur;
 668 #ifdef RLIMIT_AS
 669   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
 670     size = rlimit.rlim_cur;
 671 #endif
 672
 673   /* Leave a large safety margin for the above limits, as failure can
 674      occur when they are exceeded.  */
 675   size /= 2;
 676
 677 #ifdef RLIMIT_RSS
 678   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
 679      Exceeding RSS is not fatal, but can be quite slow.  */
 680   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
 681     size = rlimit.rlim_cur / 16 * 15;
 682 #endif
 683
 684   /* Use no less than the minimum.  */
 685   return MAX (size, MIN_SORT_SIZE);
 686 }
 687
 688 /* Return the sort buffer size to use with the input files identified
 689    by FPS and FILES, which are alternate paths to the same files.
 690    NFILES gives the number of input files; NFPS may be less.  Assume
 691    that each input line requires LINE_BYTES extra bytes' worth of line
 692    information.  Return at most SIZE_BOUND.  */
 693
 694 static size_t
 695 sort_buffer_size (FILE *const *fps, int nfps,
 696                   char *const *files, int nfiles,
 697                   size_t line_bytes, size_t size_bound)
 698 {
 699   /* In the worst case, each input byte is a newline.  */
 700   size_t worst_case_per_input_byte = line_bytes + 1;
 701
 702   /* Keep enough room for one extra input line and an extra byte.
 703      This extra room might be needed when preparing to read EOF.  */
 704   size_t size = worst_case_per_input_byte + 1;
 705
 706   int i;
 707
 708   for (i = 0; i < nfiles; i++)
 709     {
 710       struct stat st;
 711       off_t file_size;
 712       size_t worst_case;
 713
 714       if ((i < nfps ? fstat (fileno (fps[i]), &st)
 715            : strcmp (files[i], "-") == 0 ? fstat (STDIN_FILENO, &st)
 716            : stat (files[i], &st))
 717           != 0)
 718         die (_("stat failed"), files[i]);
 719
 720       file_size = S_ISREG (st.st_mode) ? st.st_size : INPUT_FILE_SIZE_GUESS;
 721
 722       /* Add the amount of memory needed to represent the worst case
 723          where the input consists entirely of newlines followed by a
 724          single non-newline.  Check for overflow.  */
 725       worst_case = file_size * worst_case_per_input_byte + 1;
 726       if (file_size != worst_case / worst_case_per_input_byte
 727           || size_bound - size <= worst_case)
 728         return size_bound;
 729       size += worst_case;
 730     }
 731
 732   return size;
 733 }
 734
 735 /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
 736    must be at least sizeof (struct line).  Allocate ALLOC bytes
 737    initially.  */
 738
 739 static void
 740 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
 741 {
 742   /* Ensure that the line array is properly aligned.  If the desired
 743      size cannot be allocated, repeatedly halve it until allocation
 744      succeeds.  The smaller allocation may hurt overall performance,
 745      but that's better than failing.  */
 746   for (;;)
 747     {
 748       alloc += sizeof (struct line) - alloc % sizeof (struct line);
 749       buf->buf = malloc (alloc);
 750       if (buf->buf)
 751         break;
 752       alloc /= 2;
 753       if (alloc <= line_bytes + 1)
 754         xalloc_die ();
 755     }
 756
 757   buf->line_bytes = line_bytes;
 758   buf->alloc = alloc;
 759   buf->used = buf->left = buf->nlines = 0;
 760   buf->eof = 0;
 761 }
 762
 763 /* Return one past the limit of the line array.  */
 764
 765 static inline struct line *
 766 buffer_linelim (struct buffer const *buf)
 767 {
 768   return (struct line *) (buf->buf + buf->alloc);
 769 }
 770
 771 /* Return a pointer to the first character of the field specified
 772    by KEY in LINE. */
 773
 774 static char *
 775 begfield (const struct line *line, const struct keyfield *key)
 776 {
 777   register char *ptr = line->text, *lim = ptr + line->length - 1;
 778   register size_t sword = key->sword;
 779   register size_t schar = key->schar;
 780
 781   if (tab)
 782     while (ptr < lim && sword--)
 783       {
 784         while (ptr < lim && *ptr != tab)
 785           ++ptr;
 786         if (ptr < lim)
 787           ++ptr;
 788       }
 789   else
 790     while (ptr < lim && sword--)
 791       {
 792         while (ptr < lim && blanks[UCHAR (*ptr)])
 793           ++ptr;
 794         while (ptr < lim && !blanks[UCHAR (*ptr)])
 795           ++ptr;
 796       }
 797
 798   if (key->skipsblanks)
 799     while (ptr < lim && blanks[UCHAR (*ptr)])
 800       ++ptr;
 801
 802   if (schar < lim - ptr)
 803     ptr += schar;
 804   else
 805     ptr = lim;
 806
 807   return ptr;
 808 }
 809
 810 /* Return the limit of (a pointer to the first character after) the field
 811    in LINE specified by KEY. */
 812
 813 static char *
 814 limfield (const struct line *line, const struct keyfield *key)
 815 {
 816   register char *ptr = line->text, *lim = ptr + line->length - 1;
 817   register size_t eword = key->eword, echar = key->echar;
 818
 819   /* Note: from the POSIX spec:
 820      The leading field separator itself is included in
 821      a field when -t is not used.  FIXME: move this comment up... */
 822
 823   /* Move PTR past EWORD fields or to one past the last byte on LINE,
 824      whichever comes first.  If there are more than EWORD fields, leave
 825      PTR pointing at the beginning of the field having zero-based index,
 826      EWORD.  If a delimiter character was specified (via -t), then that
 827      `beginning' is the first character following the delimiting TAB.
 828      Otherwise, leave PTR pointing at the first `blank' character after
 829      the preceding field.  */
 830   if (tab)
 831     while (ptr < lim && eword--)
 832       {
 833         while (ptr < lim && *ptr != tab)
 834           ++ptr;
 835         if (ptr < lim && (eword | echar))
 836           ++ptr;
 837       }
 838   else
 839     while (ptr < lim && eword--)
 840       {
 841         while (ptr < lim && blanks[UCHAR (*ptr)])
 842           ++ptr;
 843         while (ptr < lim && !blanks[UCHAR (*ptr)])
 844           ++ptr;
 845       }
 846
 847 #ifdef POSIX_UNSPECIFIED
 848   /* The following block of code makes GNU sort incompatible with
 849      standard Unix sort, so it's ifdef'd out for now.
 850      The POSIX spec isn't clear on how to interpret this.
 851      FIXME: request clarification.
 852
 853      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
 854      Date: Thu, 30 May 96 12:20:41 -0400
 855      [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
 856
 857      [...]I believe I've found another bug in `sort'.
 858
 859      $ cat /tmp/sort.in
 860      a b c 2 d
 861      pq rs 1 t
 862      $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
 863      a b c 2 d
 864      pq rs 1 t
 865      $ /bin/sort -k1.7,1.7 </tmp/sort.in
 866      pq rs 1 t
 867      a b c 2 d
 868
 869      Unix sort produced the answer I expected: sort on the single character
 870      in column 7.  GNU sort produced different results, because it disagrees
 871      on the interpretation of the key-end spec "M.N".  Unix sort reads this
 872      as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
 873      "skip M-1 fields, then either N-1 characters or the rest of the current
 874      field, whichever comes first".  This extra clause applies only to
 875      key-ends, not key-starts.
 876      */
 877
 878   /* Make LIM point to the end of (one byte past) the current field.  */
 879   if (tab)
 880     {
 881       char *newlim;
 882       newlim = memchr (ptr, tab, lim - ptr);
 883       if (newlim)
 884         lim = newlim;
 885     }
 886   else
 887     {
 888       char *newlim;
 889       newlim = ptr;
 890       while (newlim < lim && blanks[UCHAR (*newlim)])
 891         ++newlim;
 892       while (newlim < lim && !blanks[UCHAR (*newlim)])
 893         ++newlim;
 894       lim = newlim;
 895     }
 896 #endif
 897
 898   /* If we're skipping leading blanks, don't start counting characters
 899      until after skipping past any leading blanks.  */
 900   if (key->skipsblanks)
 901     while (ptr < lim && blanks[UCHAR (*ptr)])
 902       ++ptr;
 903
 904   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
 905   if (echar < lim - ptr)
 906     ptr += echar;
 907   else
 908     ptr = lim;
 909
 910   return ptr;
 911 }
 912
 913 /* FIXME */
 914
 915 static void
 916 trim_trailing_blanks (const char *a_start, char **a_end)
 917 {
 918   while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
 919     --(*a_end);
 920 }
 921
 922 /* Fill BUF reading from FP, moving buf->left bytes from the end
 923    of buf->buf to the beginning first.  If EOF is reached and the
 924    file wasn't terminated by a newline, supply one.  Set up BUF's line
 925    table too.  FILE is the name of the file corresponding to FP.
 926    Return nonzero if some input was read.  */
 927
 928 static int
 929 fillbuf (struct buffer *buf, register FILE *fp, char const *file)
 930 {
 931   struct keyfield const *key = keylist;
 932   char eol = eolchar;
 933   size_t line_bytes = buf->line_bytes;
 934   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
 935
 936   if (buf->eof)
 937     return 0;
 938
 939   if (buf->used != buf->left)
 940     {
 941       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
 942       buf->used = buf->left;
 943       buf->nlines = 0;
 944     }
 945
 946   for (;;)
 947     {
 948       char *ptr = buf->buf + buf->used;
 949       struct line *linelim = buffer_linelim (buf);
 950       struct line *line = linelim - buf->nlines;
 951       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
 952       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
 953
 954       while (line_bytes + 1 < avail)
 955         {
 956           /* Read as many bytes as possible, but do not read so many
 957              bytes that there might not be enough room for the
 958              corresponding line array.  The worst case is when the
 959              rest of the input file consists entirely of newlines,
 960              except that the last byte is not a newline.  */
 961           size_t readsize = (avail - 1) / (line_bytes + 1);
 962           size_t bytes_read = fread (ptr, 1, readsize, fp);
 963           char *ptrlim = ptr + bytes_read;
 964           char *p;
 965           avail -= bytes_read;
 966
 967           if (bytes_read != readsize)
 968             {
 969               if (ferror (fp))
 970                 die (_("read failed"), file);
 971               if (feof (fp))
 972                 {
 973                   buf->eof = 1;
 974                   if (buf->buf == ptrlim)
 975                     return 0;
 976                   if (ptrlim[-1] != eol)
 977                     *ptrlim++ = eol;
 978                 }
 979             }
 980
 981           /* Find and record each line in the just-read input.  */
 982           while ((p = memchr (ptr, eol, ptrlim - ptr)))
 983             {
 984               ptr = p + 1;
 985               line--;
 986               line->text = line_start;
 987               line->length = ptr - line_start;
 988               mergesize = MAX (mergesize, line->length);
 989               avail -= line_bytes;
 990
 991               if (key)
 992                 {
 993                   /* Precompute the position of the first key for
 994                      efficiency. */
 995                   line->keylim = key->eword == -1 ? p : limfield (line, key);
 996
 997                   if (key->sword != -1)
 998                     line->keybeg = begfield (line, key);
 999                   else
1000                     {
1001                       if (key->skipsblanks)
1002                         while (blanks[UCHAR (*line_start)])
1003                           line_start++;
1004                       line->keybeg = line_start;
1005                     }
1006                   if (key->skipeblanks)
1007                     trim_trailing_blanks (line->keybeg, &line->keylim);
1008                 }
1009
1010               line_start = ptr;
1011             }
1012
1013           ptr = ptrlim;
1014           if (buf->eof)
1015             break;
1016         }
1017
1018       buf->used = ptr - buf->buf;
1019       buf->nlines = buffer_linelim (buf) - line;
1020       if (buf->nlines != 0)
1021         {
1022           buf->left = ptr - line_start;
1023           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1024           return 1;
1025         }
1026
1027       /* The current input line is too long to fit in the buffer.
1028          Double the buffer size and try again.  */
1029       if (2 * buf->alloc < buf->alloc)
1030         xalloc_die ();
1031       buf->alloc *= 2;
1032       buf->buf = xrealloc (buf->buf, buf->alloc);
1033     }
1034 }
1035
1036 /* Compare strings A and B containing decimal fractions < 1.  Each string
1037    should begin with a decimal point followed immediately by the digits
1038    of the fraction.  Strings not of this form are considered to be zero. */
1039
1040 /* The goal here, is to take two numbers a and b... compare these
1041    in parallel.  Instead of converting each, and then comparing the
1042    outcome.  Most likely stopping the comparison before the conversion
1043    is complete.  The algorithm used, in the old sort:
1044
1045    Algorithm: fraccompare
1046    Action   : compare two decimal fractions
1047    accepts  : char *a, char *b
1048    returns  : -1 if a<b, 0 if a=b, 1 if a>b.
1049    implement:
1050
1051    if *a == decimal_point AND *b == decimal_point
1052      find first character different in a and b.
1053      if both are digits, return the difference *a - *b.
1054      if *a is a digit
1055        skip past zeros
1056        if digit return 1, else 0
1057      if *b is a digit
1058        skip past zeros
1059        if digit return -1, else 0
1060    if *a is a decimal_point
1061      skip past decimal_point and zeros
1062      if digit return 1, else 0
1063    if *b is a decimal_point
1064      skip past decimal_point and zeros
1065      if digit return -1, else 0
1066    return 0 */
1067
1068 static int
1069 fraccompare (register const char *a, register const char *b)
1070 {
1071   if (*a == decimal_point && *b == decimal_point)
1072     {
1073       while (*++a == *++b)
1074         if (! ISDIGIT (*a))
1075           return 0;
1076       if (ISDIGIT (*a) && ISDIGIT (*b))
1077         return *a - *b;
1078       if (ISDIGIT (*a))
1079         goto a_trailing_nonzero;
1080       if (ISDIGIT (*b))
1081         goto b_trailing_nonzero;
1082       return 0;
1083     }
1084   else if (*a++ == decimal_point)
1085     {
1086     a_trailing_nonzero:
1087       while (*a == NUMERIC_ZERO)
1088         a++;
1089       return ISDIGIT (*a);
1090     }
1091   else if (*b++ == decimal_point)
1092     {
1093     b_trailing_nonzero:
1094       while (*b == NUMERIC_ZERO)
1095         b++;
1096       return - ISDIGIT (*b);
1097     }
1098   return 0;
1099 }
1100
1101 /* Compare strings A and B as numbers without explicitly converting them to
1102    machine numbers.  Comparatively slow for short strings, but asymptotically
1103    hideously fast. */
1104
1105 static int
1106 numcompare (register const char *a, register const char *b)
1107 {
1108   register int tmpa, tmpb, tmp;
1109   register size_t loga, logb;
1110
1111   tmpa = *a;
1112   tmpb = *b;
1113
1114   while (blanks[UCHAR (tmpa)])
1115     tmpa = *++a;
1116   while (blanks[UCHAR (tmpb)])
1117     tmpb = *++b;
1118
1119   if (tmpa == NEGATION_SIGN)
1120     {
1121       do
1122         tmpa = *++a;
1123       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa));
1124       if (tmpb != NEGATION_SIGN)
1125         {
1126           if (tmpa == decimal_point)
1127             do
1128               tmpa = *++a;
1129             while (tmpa == NUMERIC_ZERO);
1130           if (ISDIGIT (tmpa))
1131             return -1;
1132           while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1133             tmpb = *++b;
1134           if (tmpb == decimal_point)
1135             do
1136               tmpb = *++b;
1137             while (tmpb == NUMERIC_ZERO);
1138           if (ISDIGIT (tmpb))
1139             return -1;
1140           return 0;
1141         }
1142       do
1143         tmpb = *++b;
1144       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1145
1146       while (tmpa == tmpb && ISDIGIT (tmpa))
1147         {
1148           do
1149             tmpa = *++a;
1150           while (IS_THOUSANDS_SEP (tmpa));
1151           do
1152             tmpb = *++b;
1153           while (IS_THOUSANDS_SEP (tmpb));
1154         }
1155
1156       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1157           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1158         return -fraccompare (a, b);
1159
1160       tmp = tmpb - tmpa;
1161
1162       for (loga = 0; ISDIGIT (tmpa); ++loga)
1163         do
1164           tmpa = *++a;
1165         while (IS_THOUSANDS_SEP (tmpa));
1166
1167       for (logb = 0; ISDIGIT (tmpb); ++logb)
1168         do
1169           tmpb = *++b;
1170         while (IS_THOUSANDS_SEP (tmpb));
1171
1172       if (loga != logb)
1173         return loga < logb ? 1 : -1;
1174
1175       if (!loga)
1176         return 0;
1177
1178       return tmp;
1179     }
1180   else if (tmpb == NEGATION_SIGN)
1181     {
1182       do
1183         tmpb = *++b;
1184       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1185       if (tmpb == decimal_point)
1186         do
1187           tmpb = *++b;
1188         while (tmpb == NUMERIC_ZERO);
1189       if (ISDIGIT (tmpb))
1190         return 1;
1191       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1192         tmpa = *++a;
1193       if (tmpa == decimal_point)
1194         do
1195           tmpa = *++a;
1196         while (tmpa == NUMERIC_ZERO);
1197       if (ISDIGIT (tmpa))
1198         return 1;
1199       return 0;
1200     }
1201   else
1202     {
1203       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1204         tmpa = *++a;
1205       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1206         tmpb = *++b;
1207
1208       while (tmpa == tmpb && ISDIGIT (tmpa))
1209         {
1210           do
1211             tmpa = *++a;
1212           while (IS_THOUSANDS_SEP (tmpa));
1213           do
1214             tmpb = *++b;
1215           while (IS_THOUSANDS_SEP (tmpb));
1216         }
1217
1218       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1219           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1220         return fraccompare (a, b);
1221
1222       tmp = tmpa - tmpb;
1223
1224       for (loga = 0; ISDIGIT (tmpa); ++loga)
1225         do
1226           tmpa = *++a;
1227         while (IS_THOUSANDS_SEP (tmpa));
1228
1229       for (logb = 0; ISDIGIT (tmpb); ++logb)
1230         do
1231           tmpb = *++b;
1232         while (IS_THOUSANDS_SEP (tmpb));
1233
1234       if (loga != logb)
1235         return loga < logb ? -1 : 1;
1236
1237       if (!loga)
1238         return 0;
1239
1240       return tmp;
1241     }
1242 }
1243
1244 static int
1245 general_numcompare (const char *sa, const char *sb)
1246 {
1247   /* FIXME: add option to warn about failed conversions.  */
1248   /* FIXME: maybe add option to try expensive FP conversion
1249      only if A and B can't be compared more cheaply/accurately.  */
1250
1251   char *ea;
1252   char *eb;
1253   double a = strtod (sa, &ea);
1254   double b = strtod (sb, &eb);
1255
1256   /* Put conversion errors at the start of the collating sequence.  */
1257   if (sa == ea)
1258     return sb == eb ? 0 : -1;
1259   if (sb == eb)
1260     return 1;
1261
1262   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1263      conversion errors but before numbers; sort them by internal
1264      bit-pattern, for lack of a more portable alternative.  */
1265   return (a < b ? -1
1266           : a > b ? 1
1267           : a == b ? 0
1268           : b == b ? -1
1269           : a == a ? 1
1270           : memcmp ((char *) &a, (char *) &b, sizeof a));
1271 }
1272
1273 /* Return an integer in 1..12 of the month name S with length LEN.
1274    Return 0 if the name in S is not recognized.  */
1275
1276 static int
1277 getmonth (const char *s, size_t len)
1278 {
1279   char *month;
1280   register size_t i;
1281   register int lo = 0, hi = MONTHS_PER_YEAR, result;
1282
1283   while (len > 0 && blanks[UCHAR (*s)])
1284     {
1285       ++s;
1286       --len;
1287     }
1288
1289   if (len == 0)
1290     return 0;
1291
1292   month = (char *) alloca (len + 1);
1293   for (i = 0; i < len; ++i)
1294     month[i] = fold_toupper[UCHAR (s[i])];
1295   while (blanks[UCHAR (month[i - 1])])
1296     --i;
1297   month[i] = '\0';
1298
1299   do
1300     {
1301       int ix = (lo + hi) / 2;
1302
1303       if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
1304         hi = ix;
1305       else
1306         lo = ix;
1307     }
1308   while (hi - lo > 1);
1309
1310   result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
1311             ? monthtab[lo].val : 0);
1312
1313   return result;
1314 }
1315
1316 /* Compare two lines A and B trying every key in sequence until there
1317    are no more keys or a difference is found. */
1318
1319 static int
1320 keycompare (const struct line *a, const struct line *b)
1321 {
1322   struct keyfield *key = keylist;
1323
1324   /* For the first iteration only, the key positions have been
1325      precomputed for us. */
1326   register char *texta = a->keybeg;
1327   register char *textb = b->keybeg;
1328   register char *lima = a->keylim;
1329   register char *limb = b->keylim;
1330
1331   int diff;
1332
1333   for (;;)
1334     {
1335       register unsigned char *translate = (unsigned char *) key->translate;
1336       register int *ignore = key->ignore;
1337
1338       /* Find the lengths. */
1339       size_t lena = lima <= texta ? 0 : lima - texta;
1340       size_t lenb = limb <= textb ? 0 : limb - textb;
1341
1342       if (key->skipeblanks)
1343         {
1344           char *a_end = texta + lena;
1345           char *b_end = textb + lenb;
1346           trim_trailing_blanks (texta, &a_end);
1347           trim_trailing_blanks (textb, &b_end);
1348           lena = a_end - texta;
1349           lenb = b_end - textb;
1350         }
1351
1352       /* Actually compare the fields. */
1353       if (key->numeric | key->general_numeric)
1354         {
1355           char savea = *lima, saveb = *limb;
1356
1357           *lima = *limb = '\0';
1358           diff = ((key->numeric ? numcompare : general_numcompare)
1359                   (texta, textb));
1360           *lima = savea, *limb = saveb;
1361         }
1362       else if (key->month)
1363         diff = getmonth (texta, lena) - getmonth (textb, lenb);
1364       /* Sorting like this may become slow, so in a simple locale the user
1365          can select a faster sort that is similar to ascii sort  */
1366       else if (HAVE_SETLOCALE && hard_LC_COLLATE)
1367         {
1368           if (ignore || translate)
1369             {
1370               char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
1371               char *copy_b = copy_a + lena + 1;
1372               size_t new_len_a, new_len_b, i;
1373
1374               /* Ignore and/or translate chars before comparing.  */
1375               for (new_len_a = new_len_b = i = 0; i < max (lena, lenb); i++)
1376                 {
1377                   if (i < lena)
1378                     {
1379                       copy_a[new_len_a] = (translate
1380                                            ? translate[UCHAR (texta[i])]
1381                                            : texta[i]);
1382                       if (!ignore || !ignore[UCHAR (texta[i])])
1383                         ++new_len_a;
1384                     }
1385                   if (i < lenb)
1386                     {
1387                       copy_b[new_len_b] = (translate
1388                                            ? translate[UCHAR (textb[i])]
1389                                            : textb [i]);
1390                       if (!ignore || !ignore[UCHAR (textb[i])])
1391                         ++new_len_b;
1392                     }
1393                 }
1394
1395               diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1396             }
1397           else if (lena == 0)
1398             diff = - NONZERO (lenb);
1399           else if (lenb == 0)
1400             goto greater;
1401           else
1402             diff = xmemcoll (texta, lena, textb, lenb);
1403         }
1404       else if (ignore)
1405         {
1406 #define CMP_WITH_IGNORE(A, B)                                           \
1407   do                                                                    \
1408     {                                                                   \
1409           for (;;)                                                      \
1410             {                                                           \
1411               while (texta < lima && ignore[UCHAR (*texta)])            \
1412                 ++texta;                                                \
1413               while (textb < limb && ignore[UCHAR (*textb)])            \
1414                 ++textb;                                                \
1415               if (! (texta < lima && textb < limb))                     \
1416                 break;                                                  \
1417               diff = UCHAR (A) - UCHAR (B);                             \
1418               if (diff)                                                 \
1419                 goto not_equal;                                         \
1420               ++texta;                                                  \
1421               ++textb;                                                  \
1422             }                                                           \
1423                                                                         \
1424           diff = (texta < lima) - (textb < limb);                       \
1425     }                                                                   \
1426   while (0)
1427
1428           if (translate)
1429             CMP_WITH_IGNORE (translate[UCHAR (*texta)],
1430                              translate[UCHAR (*textb)]);
1431           else
1432             CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1433         }
1434       else if (lena == 0)
1435         diff = - NONZERO (lenb);
1436       else if (lenb == 0)
1437         goto greater;
1438       else
1439         {
1440           if (translate)
1441             {
1442               while (texta < lima && textb < limb)
1443                 {
1444                   diff = (UCHAR (translate[UCHAR (*texta++)])
1445                           - UCHAR (translate[UCHAR (*textb++)]));
1446                   if (diff)
1447                     goto not_equal;
1448                 }
1449             }
1450           else
1451             {
1452               diff = memcmp (texta, textb, min (lena, lenb));
1453               if (diff)
1454                 goto not_equal;
1455             }
1456           diff = lena < lenb ? -1 : lena != lenb;
1457         }
1458
1459       if (diff)
1460         goto not_equal;
1461
1462       key = key->next;
1463       if (! key)
1464         break;
1465
1466       /* Find the beginning and limit of the next field.  */
1467       if (key->eword != -1)
1468         lima = limfield (a, key), limb = limfield (b, key);
1469       else
1470         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1471
1472       if (key->sword != -1)
1473         texta = begfield (a, key), textb = begfield (b, key);
1474       else
1475         {
1476           texta = a->text, textb = b->text;
1477           if (key->skipsblanks)
1478             {
1479               while (texta < lima && blanks[UCHAR (*texta)])
1480                 ++texta;
1481               while (textb < limb && blanks[UCHAR (*textb)])
1482                 ++textb;
1483             }
1484         }
1485     }
1486
1487   return 0;
1488
1489  greater:
1490   diff = 1;
1491  not_equal:
1492   return key->reverse ? -diff : diff;
1493 }
1494
1495 /* Compare two lines A and B, returning negative, zero, or positive
1496    depending on whether A compares less than, equal to, or greater than B. */
1497
1498 static int
1499 compare (register const struct line *a, register const struct line *b)
1500 {
1501   int diff;
1502   size_t alen, blen;
1503
1504   /* First try to compare on the specified keys (if any).
1505      The only two cases with no key at all are unadorned sort,
1506      and unadorned sort -r. */
1507   if (keylist)
1508     {
1509       diff = keycompare (a, b);
1510       alloca (0);
1511       if (diff != 0 || unique || stable)
1512         return diff;
1513     }
1514
1515   /* If the keys all compare equal (or no keys were specified)
1516      fall through to the default comparison.  */
1517   alen = a->length - 1, blen = b->length - 1;
1518
1519   if (alen == 0)
1520     diff = - NONZERO (blen);
1521   else if (blen == 0)
1522     diff = NONZERO (alen);
1523   else if (HAVE_SETLOCALE && hard_LC_COLLATE)
1524     diff = xmemcoll (a->text, alen, b->text, blen);
1525   else if (! (diff = memcmp (a->text, b->text, min (alen, blen))))
1526     diff = alen < blen ? -1 : alen != blen;
1527
1528   return reverse ? -diff : diff;
1529 }
1530
1531 /* Check that the lines read from the given FP come in order.  Print a
1532    diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1533    one if they are not in order.  Otherwise, print no diagnostic
1534    and return zero.  */
1535
1536 static int
1537 checkfp (FILE *fp, char *file_name)
1538 {
1539   struct buffer buf;            /* Input buffer. */
1540   struct line temp;             /* Copy of previous line. */
1541   size_t alloc = 0;
1542   uintmax_t line_number = 0;
1543   struct keyfield *key = keylist;
1544   int nonunique = 1 - unique;
1545   int disordered = 0;
1546
1547   initbuf (&buf, sizeof (struct line),
1548            MAX (merge_buffer_size, sort_size));
1549   temp.text = NULL;
1550
1551   while (fillbuf (&buf, fp, file_name))
1552     {
1553       struct line const *line = buffer_linelim (&buf);
1554       struct line const *linebase = line - buf.nlines;
1555
1556       /* Make sure the line saved from the old buffer contents is
1557          less than or equal to the first line of the new buffer. */
1558       if (alloc && nonunique <= compare (&temp, line - 1))
1559         {
1560         found_disorder:
1561           {
1562             char hr_buf[LONGEST_HUMAN_READABLE + 1];
1563             struct line const *disorder_line = line - 1;
1564             uintmax_t disorder_line_number =
1565               buffer_linelim (&buf) - disorder_line + line_number;
1566             fprintf (stderr, _("%s: %s:%s: disorder: "),
1567                      program_name, file_name,
1568                      human_readable (disorder_line_number, hr_buf, 1, 1));
1569             write_bytes (disorder_line->text, disorder_line->length, stderr,
1570                          _("standard error"));
1571             disordered = 1;
1572             break;
1573           }
1574         }
1575
1576       /* Compare each line in the buffer with its successor.  */
1577       while (linebase < --line)
1578         if (nonunique <= compare (line, line - 1))
1579           goto found_disorder;
1580
1581       line_number += buf.nlines;
1582
1583       /* Save the last line of the buffer.  */
1584       if (alloc < line->length)
1585         {
1586           do
1587             {
1588               alloc *= 2;
1589               if (! alloc)
1590                 {
1591                   alloc = line->length;
1592                   break;
1593                 }
1594             }
1595           while (alloc < line->length);
1596
1597           temp.text = xrealloc (temp.text, alloc);
1598         }
1599       memcpy (temp.text, line->text, line->length);
1600       temp.length = line->length;
1601       if (key)
1602         {
1603           temp.keybeg = temp.text + (line->keybeg - line->text);
1604           temp.keylim = temp.text + (line->keylim - line->text);
1605         }
1606     }
1607
1608   xfclose (fp, file_name);
1609   free (buf.buf);
1610   if (temp.text)
1611     free (temp.text);
1612   return disordered;
1613 }
1614
1615 /* Merge lines from FILES onto OFP.  NFILES cannot be greater than
1616    NMERGE.  Close input and output files before returning.
1617    OUTPUT_FILE gives the name of the output file; if OFP is NULL, the
1618    output file has not been opened yet.  */
1619
1620 static void
1621 mergefps (char **files, register int nfiles,
1622           FILE *ofp, const char *output_file)
1623 {
1624   FILE *fps[NMERGE];            /* Input streams for each file.  */
1625   struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1626   struct line saved;            /* Saved line storage for unique check. */
1627   struct line const *savedline = NULL;
1628                                 /* &saved if there is a saved line. */
1629   size_t savealloc = 0;         /* Size allocated for the saved line. */
1630   struct line const *cur[NMERGE]; /* Current line in each line table. */
1631   struct line const *base[NMERGE]; /* Base of each line table.  */
1632   int ord[NMERGE];              /* Table representing a permutation of fps,
1633                                    such that cur[ord[0]] is the smallest line
1634                                    and will be next output. */
1635   register int i, j, t;
1636   struct keyfield *key = keylist;
1637   saved.text = NULL;
1638
1639   /* Read initial lines from each input file. */
1640   for (i = 0; i < nfiles; )
1641     {
1642       fps[i] = xfopen (files[i], "r");
1643       initbuf (&buffer[i], sizeof (struct line),
1644                MAX (merge_buffer_size, sort_size / nfiles));
1645       if (fillbuf (&buffer[i], fps[i], files[i]))
1646         {
1647           struct line const *linelim = buffer_linelim (&buffer[i]);
1648           cur[i] = linelim - 1;
1649           base[i] = linelim - buffer[i].nlines;
1650           i++;
1651         }
1652       else
1653         {
1654           /* fps[i] is empty; eliminate it from future consideration.  */
1655           xfclose (fps[i], files[i]);
1656           zaptemp (files[i]);
1657           free (buffer[i].buf);
1658           --nfiles;
1659           for (j = i; j < nfiles; ++j)
1660             files[j] = files[j + 1];
1661         }
1662     }
1663
1664   if (! ofp)
1665     ofp = xfopen (output_file, "w");
1666
1667   /* Set up the ord table according to comparisons among input lines.
1668      Since this only reorders two items if one is strictly greater than
1669      the other, it is stable. */
1670   for (i = 0; i < nfiles; ++i)
1671     ord[i] = i;
1672   for (i = 1; i < nfiles; ++i)
1673     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
1674       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1675
1676   /* Repeatedly output the smallest line until no input remains. */
1677   while (nfiles)
1678     {
1679       struct line const *smallest = cur[ord[0]];
1680
1681       /* If uniquified output is turned on, output only the first of
1682          an identical series of lines. */
1683       if (unique)
1684         {
1685           if (savedline && compare (savedline, smallest))
1686             {
1687               savedline = 0;
1688               write_bytes (saved.text, saved.length, ofp, output_file);
1689             }
1690           if (!savedline)
1691             {
1692               savedline = &saved;
1693               if (savealloc < smallest->length)
1694                 {
1695                   do
1696                     if (! savealloc)
1697                       {
1698                         savealloc = smallest->length;
1699                         break;
1700                       }
1701                   while ((savealloc *= 2) < smallest->length);
1702
1703                   saved.text = xrealloc (saved.text, savealloc);
1704                 }
1705               saved.length = smallest->length;
1706               memcpy (saved.text, smallest->text, saved.length);
1707               if (key)
1708                 {
1709                   saved.keybeg =
1710                     saved.text + (smallest->keybeg - smallest->text);
1711                   saved.keylim =
1712                     saved.text + (smallest->keylim - smallest->text);
1713                 }
1714             }
1715         }
1716       else
1717         write_bytes (smallest->text, smallest->length, ofp, output_file);
1718
1719       /* Check if we need to read more lines into core. */
1720       if (base[ord[0]] < smallest)
1721         cur[ord[0]] = smallest - 1;
1722       else
1723         {
1724           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
1725             {
1726               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
1727               cur[ord[0]] = linelim - 1;
1728               base[ord[0]] = linelim - buffer[ord[0]].nlines;
1729             }
1730           else
1731             {
1732               /* We reached EOF on fps[ord[0]]. */
1733               for (i = 1; i < nfiles; ++i)
1734                 if (ord[i] > ord[0])
1735                   --ord[i];
1736               --nfiles;
1737               xfclose (fps[ord[0]], files[ord[0]]);
1738               zaptemp (files[ord[0]]);
1739               free (buffer[ord[0]].buf);
1740               for (i = ord[0]; i < nfiles; ++i)
1741                 {
1742                   fps[i] = fps[i + 1];
1743                   files[i] = files[i + 1];
1744                   buffer[i] = buffer[i + 1];
1745                   cur[i] = cur[i + 1];
1746                   base[i] = base[i + 1];
1747                 }
1748               for (i = 0; i < nfiles; ++i)
1749                 ord[i] = ord[i + 1];
1750               continue;
1751             }
1752         }
1753
1754       /* The new line just read in may be larger than other lines
1755          already in core; push it back in the queue until we encounter
1756          a line larger than it. */
1757       for (i = 1; i < nfiles; ++i)
1758         {
1759           t = compare (cur[ord[0]], cur[ord[i]]);
1760           if (!t)
1761             t = ord[0] - ord[i];
1762           if (t < 0)
1763             break;
1764         }
1765       t = ord[0];
1766       for (j = 1; j < i; ++j)
1767         ord[j - 1] = ord[j];
1768       ord[i - 1] = t;
1769     }
1770
1771   if (unique && savedline)
1772     {
1773       write_bytes (saved.text, saved.length, ofp, output_file);
1774       free (saved.text);
1775     }
1776
1777   xfclose (ofp, output_file);
1778 }
1779
1780 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
1781    The input and output arrays are in reverse order, and LINES and
1782    TEMP point just past the end of their respective arrays.  */
1783
1784 static void
1785 sortlines (struct line *lines, size_t nlines, struct line *temp)
1786 {
1787   register struct line *lo, *hi, *t;
1788   register size_t nlo, nhi;
1789
1790   if (nlines == 2)
1791     {
1792       if (0 < compare (&lines[-1], &lines[-2]))
1793         {
1794           struct line tmp = lines[-1];
1795           lines[-1] = lines[-2];
1796           lines[-2] = tmp;
1797         }
1798       return;
1799     }
1800
1801   nlo = nlines / 2;
1802   lo = lines;
1803   nhi = nlines - nlo;
1804   hi = lines - nlo;
1805
1806   if (nlo > 1)
1807     sortlines (lo, nlo, temp);
1808
1809   if (nhi > 1)
1810     sortlines (hi, nhi, temp);
1811
1812   t = temp;
1813
1814   while (nlo && nhi)
1815     if (compare (lo - 1, hi - 1) <= 0)
1816       *--t = *--lo, --nlo;
1817     else
1818       *--t = *--hi, --nhi;
1819   while (nlo--)
1820     *--t = *--lo;
1821
1822   for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1823     *--lo = *--t;
1824 }
1825
1826 /* Return the index of the first of NFILES FILES that is the same file
1827    as OUTFILE.  If none can be the same, return NFILES.  Consider an
1828    input pipe to be the same as OUTFILE, since the pipe might be the
1829    output of a command like "cat OUTFILE".  */
1830
1831 static int
1832 first_same_file (char **files, int nfiles, char const *outfile)
1833 {
1834   int i;
1835   int got_outstat = 0;
1836   struct stat instat, outstat;
1837
1838   for (i = 0; i < nfiles; i++)
1839     {
1840       int standard_input = STREQ (files[i], "-");
1841
1842       if (STREQ (outfile, files[i]) && ! standard_input)
1843         return i;
1844
1845       if (! got_outstat)
1846         {
1847           got_outstat = 1;
1848           if ((STREQ (outfile, "-")
1849                ? fstat (STDOUT_FILENO, &outstat)
1850                : stat (outfile, &outstat))
1851               != 0)
1852             return nfiles;
1853         }
1854
1855       if (((standard_input
1856             ? fstat (STDIN_FILENO, &instat)
1857             : stat (files[i], &instat))
1858            == 0)
1859           && (S_ISFIFO (instat.st_mode) || SAME_INODE (instat, outstat)))
1860         return i;
1861     }
1862
1863   return nfiles;
1864 }
1865
1866 /* Check that each of the NFILES FILES is ordered.
1867    Return a count of disordered files. */
1868
1869 static int
1870 check (char **files, int nfiles)
1871 {
1872   int i, disorders = 0;
1873   FILE *fp;
1874
1875   for (i = 0; i < nfiles; ++i)
1876     {
1877       fp = xfopen (files[i], "r");
1878       disorders += checkfp (fp, files[i]);
1879     }
1880   return disorders;
1881 }
1882
1883 /* Merge NFILES FILES onto OUTPUT_FILE.  However, merge at most
1884    MAX_MERGE input files directly onto OUTPUT_FILE.  MAX_MERGE cannot
1885    exceed NMERGE.  */
1886
1887 static void
1888 merge (char **files, int nfiles, int max_merge, char const *output_file)
1889 {
1890   while (max_merge < nfiles)
1891     {
1892       FILE *tfp;
1893       int i, t = 0;
1894       char *temp;
1895       for (i = 0; i < nfiles / NMERGE; ++i)
1896         {
1897           temp = create_temp_file (&tfp);
1898           mergefps (&files[i * NMERGE], NMERGE, tfp, temp);
1899           files[t++] = temp;
1900         }
1901       temp = create_temp_file (&tfp);
1902       mergefps (&files[i * NMERGE], nfiles % NMERGE, tfp, temp);
1903       files[t++] = temp;
1904       nfiles = t;
1905       if (nfiles == 1)
1906         break;
1907     }
1908
1909   mergefps (files, nfiles, NULL, output_file);
1910 }
1911
1912 /* Sort NFILES FILES onto OUTPUT_FILE. */
1913
1914 static void
1915 sort (char **files, int nfiles, char const *output_file)
1916 {
1917   struct buffer buf;
1918   int n_temp_files = 0;
1919   int output_file_created = 0;
1920
1921   static size_t size;
1922   if (! size && ! (size = sort_size))
1923     size = default_sort_size ();
1924
1925   buf.alloc = 0;
1926
1927   while (nfiles)
1928     {
1929       char const *temp_output;
1930       char const *file = *files;
1931       FILE *fp = xfopen (file, "r");
1932       FILE *tfp;
1933
1934       if (! buf.alloc)
1935         initbuf (&buf, 2 * sizeof (struct line),
1936                  sort_buffer_size (&fp, 1, files, nfiles,
1937                                    2 * sizeof (struct line), size));
1938       buf.eof = 0;
1939       files++;
1940       nfiles--;
1941
1942       while (fillbuf (&buf, fp, file))
1943         {
1944           struct line *line;
1945           struct line *linebase;
1946
1947           if (buf.eof && nfiles
1948               && (2 * sizeof (struct line) + 1
1949                   < (buf.alloc - buf.used
1950                      - 2 * sizeof (struct line) * buf.nlines)))
1951             {
1952               /* End of file, but there is more input and buffer room.
1953                  Concatenate the next input file; this is faster in
1954                  the usual case.  */
1955               buf.left = buf.used;
1956               break;
1957             }
1958
1959           line = buffer_linelim (&buf);
1960           linebase = line - buf.nlines;
1961           sortlines (line, buf.nlines, linebase);
1962           if (buf.eof && !nfiles && !n_temp_files && !buf.left)
1963             {
1964               xfclose (fp, file);
1965               tfp = xfopen (output_file, "w");
1966               temp_output = output_file;
1967               output_file_created = 1;
1968             }
1969           else
1970             {
1971               ++n_temp_files;
1972               temp_output = create_temp_file (&tfp);
1973             }
1974
1975           do
1976             {
1977               line--;
1978               write_bytes (line->text, line->length, tfp, temp_output);
1979               if (unique)
1980                 while (linebase < line && compare (line, line - 1) == 0)
1981                   line--;
1982             }
1983           while (linebase < line);
1984
1985           xfclose (tfp, temp_output);
1986
1987           if (output_file_created)
1988             goto finish;
1989         }
1990       xfclose (fp, file);
1991     }
1992
1993  finish:
1994   free (buf.buf);
1995
1996   if (! output_file_created)
1997     {
1998       int i = n_temp_files;
1999       struct tempnode *node;
2000       char **tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
2001       for (node = temphead; i > 0; node = node->next)
2002         tempfiles[--i] = node->name;
2003       merge (tempfiles, n_temp_files, NMERGE, output_file);
2004       free ((char *) tempfiles);
2005     }
2006 }
2007
2008 /* Insert key KEY at the end of the key list.  */
2009
2010 static void
2011 insertkey (struct keyfield *key)
2012 {
2013   struct keyfield **p;
2014
2015   for (p = &keylist; *p; p = &(*p)->next)
2016     continue;
2017   *p = key;
2018   key->next = NULL;
2019 }
2020
2021 /* Report a bad field specification SPEC, with extra info MSGID.  */
2022
2023 static void badfieldspec PARAMS ((char const *, char const *))
2024      ATTRIBUTE_NORETURN;
2025 static void
2026 badfieldspec (char const *spec, char const *msgid)
2027 {
2028   error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"),
2029          _(msgid), spec);
2030   abort ();
2031 }
2032
2033 /* Parse the leading integer in STRING and store the resulting value
2034    (which must fit into size_t) into *VAL.  Return the address of the
2035    suffix after the integer.  If MSGID is NULL, return NULL after
2036    failure; otherwise, report MSGID and exit on failure.  */
2037
2038 static char const *
2039 parse_field_count (char const *string, size_t *val, char const *msgid)
2040 {
2041   char *suffix;
2042   uintmax_t n;
2043
2044   switch (xstrtoumax (string, &suffix, 10, &n, ""))
2045     {
2046     case LONGINT_OK:
2047     case LONGINT_INVALID_SUFFIX_CHAR:
2048       *val = n;
2049       if (*val == n)
2050         break;
2051       /* Fall through.  */
2052     case LONGINT_OVERFLOW:
2053       if (msgid)
2054         error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2055                _(msgid), (int) (suffix - string), string);
2056       return NULL;
2057
2058     case LONGINT_INVALID:
2059       if (msgid)
2060         error (SORT_FAILURE, 0, _("%s: invalid count at start of `%s'"),
2061                _(msgid), string);
2062       return NULL;
2063     }
2064
2065   return suffix;
2066 }
2067
2068 /* Handle interrupts and hangups. */
2069
2070 static void
2071 sighandler (int sig)
2072 {
2073 #ifndef SA_NOCLDSTOP
2074   signal (sig, SIG_IGN);
2075 #endif
2076
2077   cleanup ();
2078
2079 #ifdef SA_NOCLDSTOP
2080   {
2081     struct sigaction sigact;
2082
2083     sigact.sa_handler = SIG_DFL;
2084     sigemptyset (&sigact.sa_mask);
2085     sigact.sa_flags = 0;
2086     sigaction (sig, &sigact, NULL);
2087   }
2088 #else
2089   signal (sig, SIG_DFL);
2090 #endif
2091
2092   kill (process_id, sig);
2093 }
2094
2095 /* Set the ordering options for KEY specified in S.
2096    Return the address of the first character in S that
2097    is not a valid ordering option.
2098    BLANKTYPE is the kind of blanks that 'b' should skip. */
2099
2100 static char *
2101 set_ordering (register const char *s, struct keyfield *key,
2102               enum blanktype blanktype)
2103 {
2104   while (*s)
2105     {
2106       switch (*s)
2107         {
2108         case 'b':
2109           if (blanktype == bl_start || blanktype == bl_both)
2110             key->skipsblanks = 1;
2111           if (blanktype == bl_end || blanktype == bl_both)
2112             key->skipeblanks = 1;
2113           break;
2114         case 'd':
2115           key->ignore = nondictionary;
2116           break;
2117         case 'f':
2118           key->translate = fold_toupper;
2119           break;
2120         case 'g':
2121           key->general_numeric = 1;
2122           break;
2123         case 'i':
2124           key->ignore = nonprinting;
2125           break;
2126         case 'M':
2127           key->month = 1;
2128           break;
2129         case 'n':
2130           key->numeric = 1;
2131           break;
2132         case 'r':
2133           key->reverse = 1;
2134           break;
2135         default:
2136           return (char *) s;
2137         }
2138       ++s;
2139     }
2140   return (char *) s;
2141 }
2142
2143 static struct keyfield *
2144 new_key (void)
2145 {
2146   struct keyfield *key = (struct keyfield *) xcalloc (1, sizeof *key);
2147   key->eword = -1;
2148   return key;
2149 }
2150
2151 int
2152 main (int argc, char **argv)
2153 {
2154   struct keyfield *key;
2155   struct keyfield gkey;
2156   char const *s;
2157   int i;
2158   int c = 0;
2159   int checkonly = 0, mergeonly = 0, nfiles = 0;
2160   int posix_pedantic = (getenv ("POSIXLY_CORRECT") != NULL);
2161   bool obsolete_usage = (posix2_version () < 200112);
2162   char const *short_options = (obsolete_usage
2163                                ? COMMON_SHORT_OPTIONS "y::"
2164                                : COMMON_SHORT_OPTIONS "y:");
2165   char *minus = "-", **files;
2166   char const *outfile = minus;
2167   static int const sigs[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2168   int nsigs = sizeof sigs / sizeof *sigs;
2169 #ifdef SA_NOCLDSTOP
2170   struct sigaction oldact, newact;
2171 #endif
2172
2173   program_name = argv[0];
2174   process_id = getpid ();
2175   setlocale (LC_ALL, "");
2176   bindtextdomain (PACKAGE, LOCALEDIR);
2177   textdomain (PACKAGE);
2178
2179   atexit (cleanup);
2180
2181   hard_LC_COLLATE = hard_locale (LC_COLLATE);
2182 #if HAVE_NL_LANGINFO
2183   hard_LC_TIME = hard_locale (LC_TIME);
2184 #endif
2185
2186 #if HAVE_SETLOCALE
2187   /* Let's get locale's representation of the decimal point */
2188   {
2189     struct lconv *lconvp = localeconv ();
2190
2191     /* If the locale doesn't define a decimal point, or if the decimal
2192        point is multibyte, use the C decimal point.  We don't support
2193        multibyte decimal points yet.  */
2194     decimal_point = *lconvp->decimal_point;
2195     if (! decimal_point || lconvp->decimal_point[1])
2196       decimal_point = C_DECIMAL_POINT;
2197
2198     /* We don't support multibyte thousands separators yet.  */
2199     th_sep = *lconvp->thousands_sep;
2200     if (! th_sep || lconvp->thousands_sep[1])
2201       th_sep = CHAR_MAX + 1;
2202   }
2203 #endif
2204
2205   have_read_stdin = 0;
2206   inittables ();
2207
2208   /* Change the way library functions fail.  */
2209   xalloc_exit_failure = SORT_FAILURE;
2210   xmemcoll_exit_failure = SORT_FAILURE;
2211
2212 #ifdef SA_NOCLDSTOP
2213   sigemptyset (&caught_signals);
2214   for (i = 0; i < nsigs; i++)
2215     sigaddset (&caught_signals, sigs[i]);
2216   newact.sa_handler = sighandler;
2217   newact.sa_mask = caught_signals;
2218   newact.sa_flags = 0;
2219 #endif
2220
2221   for (i = 0; i < nsigs; i++)
2222     {
2223       int sig = sigs[i];
2224 #ifdef SA_NOCLDSTOP
2225       sigaction (sig, NULL, &oldact);
2226       if (oldact.sa_handler != SIG_IGN)
2227         sigaction (sig, &newact, NULL);
2228 #else
2229       if (signal (sig, SIG_IGN) != SIG_IGN)
2230         signal (sig, sighandler);
2231 #endif
2232     }
2233
2234   gkey.sword = gkey.eword = -1;
2235   gkey.ignore = NULL;
2236   gkey.translate = NULL;
2237   gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
2238   gkey.skipsblanks = gkey.skipeblanks = 0;
2239
2240   files = (char **) xmalloc (sizeof (char *) * argc);
2241
2242   for (;;)
2243     {
2244       /* Parse an operand as a file after "--" was seen; or if
2245          pedantic and a file was seen, unless the POSIX version
2246          predates 1003.1-2001 and -c was not seen and the operand is
2247          "-o FILE" or "-oFILE".  */
2248
2249       if (c == -1
2250           || (posix_pedantic && nfiles != 0
2251               && ! (obsolete_usage
2252                     && ! checkonly
2253                     && optind != argc
2254                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
2255                     && (argv[optind][2] || optind + 1 != argc)))
2256           || ((c = getopt_long (argc, argv, short_options,
2257                                 long_options, NULL))
2258               == -1))
2259         {
2260           if (optind == argc)
2261             break;
2262           files[nfiles++] = argv[optind++];
2263         }
2264       else switch (c)
2265         {
2266         case 1:
2267           key = NULL;
2268           if (obsolete_usage && optarg[0] == '+')
2269             {
2270               /* Treat +POS1 [-POS2] as a key if possible; but silently
2271                  treat an operand as a file if it is not a valid +POS1.  */
2272               key = new_key ();
2273               s = parse_field_count (optarg + 1, &key->sword, NULL);
2274               if (s && *s == '.')
2275                 s = parse_field_count (s + 1, &key->schar, NULL);
2276               if (! (key->sword | key->schar))
2277                 key->sword = -1;
2278               if (! s || *set_ordering (s, key, bl_start))
2279                 {
2280                   free (key);
2281                   key = NULL;
2282                 }
2283               else
2284                 {
2285                   if (optind != argc && argv[optind][0] == '-'
2286                       && ISDIGIT (argv[optind][1]))
2287                     {
2288                       char const *optarg1 = argv[optind++];
2289                       s = parse_field_count (optarg1 + 1, &key->eword,
2290                                              N_("invalid number after `-'"));
2291                       if (*s == '.')
2292                         s = parse_field_count (s + 1, &key->echar,
2293                                                N_("invalid number after `.'"));
2294                       if (*set_ordering (s, key, bl_end))
2295                         badfieldspec (optarg1,
2296                                       N_("stray character in field spec"));
2297                     }
2298                   insertkey (key);
2299                 }
2300             }
2301           if (! key)
2302             files[nfiles++] = optarg;
2303           break;
2304
2305         case 'b':
2306         case 'd':
2307         case 'f':
2308         case 'g':
2309         case 'i':
2310         case 'M':
2311         case 'n':
2312         case 'r':
2313           {
2314             char str[2];
2315             str[0] = c;
2316             str[1] = '\0';
2317             set_ordering (str, &gkey, bl_both);
2318           }
2319           break;
2320
2321         case 'c':
2322           checkonly = 1;
2323           break;
2324
2325         case 'k':
2326           key = new_key ();
2327
2328           /* Get POS1. */
2329           s = parse_field_count (optarg, &key->sword,
2330                                  N_("invalid number at field start"));
2331           if (! key->sword--)
2332             {
2333               /* Provoke with `sort -k0' */
2334               badfieldspec (optarg, N_("field number is zero"));
2335             }
2336           if (*s == '.')
2337             {
2338               s = parse_field_count (s + 1, &key->schar,
2339                                      N_("invalid number after `.'"));
2340               if (! key->schar--)
2341                 {
2342                   /* Provoke with `sort -k1.0' */
2343                   badfieldspec (optarg, N_("character offset is zero"));
2344                 }
2345             }
2346           if (! (key->sword | key->schar))
2347             key->sword = -1;
2348           s = set_ordering (s, key, bl_start);
2349           if (*s != ',')
2350             {
2351               key->eword = -1;
2352               key->echar = 0;
2353             }
2354           else
2355             {
2356               /* Get POS2. */
2357               s = parse_field_count (s + 1, &key->eword,
2358                                      N_("invalid number after `,'"));
2359               if (! key->eword--)
2360                 {
2361                   /* Provoke with `sort -k1,0' */
2362                   badfieldspec (optarg, N_("field number is zero"));
2363                 }
2364               if (*s == '.')
2365                 s = parse_field_count (s + 1, &key->echar,
2366                                        N_("invalid number after `.'"));
2367               else
2368                 {
2369                   /* `-k 2,3' is equivalent to `+1 -3'.  */
2370                   key->eword++;
2371                 }
2372               s = set_ordering (s, key, bl_end);
2373             }
2374           if (*s)
2375             badfieldspec (optarg, N_("stray character in field spec"));
2376           insertkey (key);
2377           break;
2378
2379         case 'm':
2380           mergeonly = 1;
2381           break;
2382
2383         case 'o':
2384           outfile = optarg;
2385           break;
2386
2387         case 's':
2388           stable = 1;
2389           break;
2390
2391         case 'S':
2392           specify_sort_size (optarg);
2393           break;
2394
2395         case 't':
2396           tab = optarg[0];
2397           if (tab && optarg[1])
2398             {
2399               /* Provoke with `sort -txx'.  Complain about
2400                  "multi-character tab" instead of "multibyte tab", so
2401                  that the diagnostic's wording does not need to be
2402                  changed once multibyte characters are supported.  */
2403               error (SORT_FAILURE, 0, _("multi-character tab `%s'"), optarg);
2404             }
2405           break;
2406
2407         case 'T':
2408           add_temp_dir (optarg);
2409           break;
2410
2411         case 'u':
2412           unique = 1;
2413           break;
2414
2415         case 'y':
2416           /* Accept and ignore e.g. -y0 for compatibility with Solaris
2417              2.x through Solaris 7.  -y is marked as obsolete starting
2418              with Solaris 8.  */
2419           break;
2420
2421         case 'z':
2422           eolchar = 0;
2423           break;
2424
2425         case_GETOPT_HELP_CHAR;
2426
2427         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2428
2429         default:
2430           usage (SORT_FAILURE);
2431         }
2432     }
2433
2434   /* Inheritance of global options to individual keys. */
2435   for (key = keylist; key; key = key->next)
2436     if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2437         && !key->skipeblanks && !key->month && !key->numeric
2438         && !key->general_numeric)
2439       {
2440         key->ignore = gkey.ignore;
2441         key->translate = gkey.translate;
2442         key->skipsblanks = gkey.skipsblanks;
2443         key->skipeblanks = gkey.skipeblanks;
2444         key->month = gkey.month;
2445         key->numeric = gkey.numeric;
2446         key->general_numeric = gkey.general_numeric;
2447         key->reverse = gkey.reverse;
2448       }
2449
2450   if (!keylist && (gkey.ignore || gkey.translate || gkey.skipsblanks
2451                    || gkey.skipeblanks || gkey.month || gkey.numeric
2452                    || gkey.general_numeric))
2453     insertkey (&gkey);
2454   reverse = gkey.reverse;
2455
2456   if (temp_dir_count == 0)
2457     {
2458       char const *tmp_dir = getenv ("TMPDIR");
2459       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
2460     }
2461
2462   if (nfiles == 0)
2463     {
2464       nfiles = 1;
2465       files = &minus;
2466     }
2467
2468   if (checkonly)
2469     {
2470       if (nfiles > 1)
2471         error (SORT_FAILURE, 0, _("extra operand `%s' not allowed with -c"),
2472                files[1]);
2473
2474       /* POSIX requires that sort return 1 IFF invoked with -c and the
2475          input is not properly sorted.  */
2476       exit (check (files, nfiles) == 0 ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
2477     }
2478
2479   if (mergeonly)
2480     {
2481       int max_merge = first_same_file (files, MIN (nfiles, NMERGE), outfile);
2482       merge (files, nfiles, max_merge, outfile);
2483     }
2484   else
2485     sort (files, nfiles, outfile);
2486
2487   if (have_read_stdin && fclose (stdin) == EOF)
2488     die (_("close failed"), "-");
2489
2490   exit (EXIT_SUCCESS);
2491 }