.
[coreutils.git] / src / sort.c
blobb67582a038170c2e4d3060dfec818607343a87dd
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 1991-2004 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
24 #include <config.h>
26 #include <getopt.h>
27 #include <sys/types.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #include "system.h"
31 #include "error.h"
32 #include "hard-locale.h"
33 #include "inttostr.h"
34 #include "long-options.h"
35 #include "physmem.h"
36 #include "posixver.h"
37 #include "stdio-safer.h"
38 #include "xmemcoll.h"
39 #include "xstrtol.h"
41 #if HAVE_SYS_RESOURCE_H
42 # include <sys/resource.h>
43 #endif
44 #ifndef RLIMIT_DATA
45 struct rlimit { size_t rlim_cur; };
46 # define getrlimit(Resource, Rlp) (-1)
47 #endif
49 /* The official name of this program (e.g., no `g' prefix). */
50 #define PROGRAM_NAME "sort"
52 #define AUTHORS "Mike Haertel", "Paul Eggert"
54 #if HAVE_LANGINFO_CODESET
55 # include <langinfo.h>
56 #endif
58 #ifndef SA_NOCLDSTOP
59 # define sigprocmask(How, Set, Oset) /* empty */
60 # define sigset_t int
61 #endif
63 #ifndef STDC_HEADERS
64 double strtod ();
65 #endif
67 #define UCHAR_LIM (UCHAR_MAX + 1)
68 #define UCHAR(c) ((unsigned char) (c))
70 #ifndef DEFAULT_TMPDIR
71 # define DEFAULT_TMPDIR "/tmp"
72 #endif
74 /* Exit statuses. */
75 enum
77 /* POSIX says to exit with status 1 if invoked with -c and the
78 input is not properly sorted. */
79 SORT_OUT_OF_ORDER = 1,
81 /* POSIX says any other irregular exit must exit with a status
82 code greater than 1. */
83 SORT_FAILURE = 2
86 #define C_DECIMAL_POINT '.'
87 #define NEGATION_SIGN '-'
88 #define NUMERIC_ZERO '0'
90 #if HAVE_SETLOCALE
92 static char decimal_point;
93 static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
95 /* Nonzero if the corresponding locales are hard. */
96 static bool hard_LC_COLLATE;
97 # if HAVE_NL_LANGINFO
98 static bool hard_LC_TIME;
99 # endif
101 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)
103 #else
105 # define decimal_point C_DECIMAL_POINT
106 # define IS_THOUSANDS_SEP(x) 0
108 #endif
110 #define NONZERO(x) (x != 0)
112 /* The kind of blanks for '-b' to skip in various options. */
113 enum blanktype { bl_start, bl_end, bl_both };
115 /* The character marking end of line. Default to \n. */
116 static char eolchar = '\n';
118 /* Lines are held in core as counted strings. */
119 struct line
121 char *text; /* Text of the line. */
122 size_t length; /* Length including final newline. */
123 char *keybeg; /* Start of first key. */
124 char *keylim; /* Limit of first key. */
127 /* Input buffers. */
128 struct buffer
130 char *buf; /* Dynamically allocated buffer,
131 partitioned into 3 regions:
132 - input data;
133 - unused area;
134 - an array of lines, in reverse order. */
135 size_t used; /* Number of bytes used for input data. */
136 size_t nlines; /* Number of lines in the line array. */
137 size_t alloc; /* Number of bytes allocated. */
138 size_t left; /* Number of bytes left from previous reads. */
139 size_t line_bytes; /* Number of bytes to reserve for each line. */
140 bool eof; /* An EOF has been read. */
143 struct keyfield
145 size_t sword; /* Zero-origin 'word' to start at. */
146 size_t schar; /* Additional characters to skip. */
147 size_t eword; /* Zero-origin first word after field. */
148 size_t echar; /* Additional characters in field. */
149 bool const *ignore; /* Boolean array of characters to ignore. */
150 char const *translate; /* Translation applied to characters. */
151 bool skipsblanks; /* Skip leading blanks at start. */
152 bool skipeblanks; /* Skip trailing blanks at finish. */
153 bool numeric; /* Flag for numeric comparison. Handle
154 strings of digits with optional decimal
155 point, but no exponential notation. */
156 bool general_numeric; /* Flag for general, numeric comparison.
157 Handle numbers in exponential notation. */
158 bool month; /* Flag for comparison by month name. */
159 bool reverse; /* Reverse the sense of comparison. */
160 struct keyfield *next; /* Next keyfield to try. */
163 struct month
165 char const *name;
166 int val;
169 /* The name this program was run with. */
170 char *program_name;
172 /* FIXME: None of these tables work with multibyte character sets.
173 Also, there are many other bugs when handling multibyte characters.
174 One way to fix this is to rewrite `sort' to use wide characters
175 internally, but doing this with good performance is a bit
176 tricky. */
178 /* Table of blanks. */
179 static bool blanks[UCHAR_LIM];
181 /* Table of non-printing characters. */
182 static bool nonprinting[UCHAR_LIM];
184 /* Table of non-dictionary characters (not letters, digits, or blanks). */
185 static bool nondictionary[UCHAR_LIM];
187 /* Translation table folding lower case to upper. */
188 static char fold_toupper[UCHAR_LIM];
190 #define MONTHS_PER_YEAR 12
192 /* Table mapping month names to integers.
193 Alphabetic order allows binary search. */
194 static struct month monthtab[] =
196 {"APR", 4},
197 {"AUG", 8},
198 {"DEC", 12},
199 {"FEB", 2},
200 {"JAN", 1},
201 {"JUL", 7},
202 {"JUN", 6},
203 {"MAR", 3},
204 {"MAY", 5},
205 {"NOV", 11},
206 {"OCT", 10},
207 {"SEP", 9}
210 /* During the merge phase, the number of files to merge at once. */
211 #define NMERGE 16
213 /* Minimum size for a merge or check buffer. */
214 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
216 /* Minimum sort size; the code might not work with smaller sizes. */
217 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
219 /* The number of bytes needed for a merge or check buffer, which can
220 function relatively efficiently even if it holds only one line. If
221 a longer line is seen, this value is increased. */
222 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
224 /* The approximate maximum number of bytes of main memory to use, as
225 specified by the user. Zero if the user has not specified a size. */
226 static size_t sort_size;
228 /* The guessed size for non-regular files. */
229 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
231 /* Array of directory names in which any temporary files are to be created. */
232 static char const **temp_dirs;
234 /* Number of temporary directory names used. */
235 static size_t temp_dir_count;
237 /* Number of allocated slots in temp_dirs. */
238 static size_t temp_dir_alloc;
240 /* Flag to reverse the order of all comparisons. */
241 static bool reverse;
243 /* Flag for stable sort. This turns off the last ditch bytewise
244 comparison of lines, and instead leaves lines in the same order
245 they were read if all keys compare equal. */
246 static bool stable;
248 /* If TAB has this value, blanks separate fields. */
249 enum { TAB_DEFAULT = CHAR_MAX + 1 };
251 /* Tab character separating fields. If TAB_DEFAULT, then fields are
252 separated by the empty string between a non-blank character and a blank
253 character. */
254 static int tab = TAB_DEFAULT;
256 /* Flag to remove consecutive duplicate lines from the output.
257 Only the last of a sequence of equal lines will be output. */
258 static bool unique;
260 /* Nonzero if any of the input files are the standard input. */
261 static bool have_read_stdin;
263 /* List of key field comparisons to be tried. */
264 static struct keyfield *keylist;
266 static void sortlines_temp (struct line *, size_t, struct line *);
268 void
269 usage (int status)
271 if (status != EXIT_SUCCESS)
272 fprintf (stderr, _("Try `%s --help' for more information.\n"),
273 program_name);
274 else
276 printf (_("\
277 Usage: %s [OPTION]... [FILE]...\n\
279 program_name);
280 fputs (_("\
281 Write sorted concatenation of all FILE(s) to standard output.\n\
283 Ordering options:\n\
285 "), stdout);
286 fputs (_("\
287 Mandatory arguments to long options are mandatory for short options too.\n\
288 "), stdout);
289 fputs (_("\
290 -b, --ignore-leading-blanks ignore leading blanks\n\
291 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
292 -f, --ignore-case fold lower case to upper case characters\n\
293 "), stdout);
294 fputs (_("\
295 -g, --general-numeric-sort compare according to general numerical value\n\
296 -i, --ignore-nonprinting consider only printable characters\n\
297 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
298 -n, --numeric-sort compare according to string numerical value\n\
299 -r, --reverse reverse the result of comparisons\n\
301 "), stdout);
302 fputs (_("\
303 Other options:\n\
305 -c, --check check whether input is sorted; do not sort\n\
306 -k, --key=POS1[,POS2] start a key at POS1, end it at POS 2 (origin 1)\n\
307 -m, --merge merge already sorted files; do not sort\n\
308 -o, --output=FILE write result to FILE instead of standard output\n\
309 -s, --stable stabilize sort by disabling last-resort comparison\n\
310 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
311 "), stdout);
312 printf (_("\
313 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
314 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
315 multiple options specify multiple directories\n\
316 -u, --unique with -c, check for strict ordering;\n\
317 without -c, output only the first of an equal run\n\
318 "), DEFAULT_TMPDIR);
319 fputs (_("\
320 -z, --zero-terminated end lines with 0 byte, not newline\n\
321 "), stdout);
322 fputs (HELP_OPTION_DESCRIPTION, stdout);
323 fputs (VERSION_OPTION_DESCRIPTION, stdout);
324 fputs (_("\
326 POS is F[.C][OPTS], where F is the field number and C the character position\n\
327 in the field. OPTS is one or more single-letter ordering options, which\n\
328 override global ordering options for that key. If no key is given, use the\n\
329 entire line as the key.\n\
331 SIZE may be followed by the following multiplicative suffixes:\n\
332 "), stdout);
333 fputs (_("\
334 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
336 With no FILE, or when FILE is -, read standard input.\n\
338 *** WARNING ***\n\
339 The locale specified by the environment affects sort order.\n\
340 Set LC_ALL=C to get the traditional sort order that uses\n\
341 native byte values.\n\
342 "), stdout );
343 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
346 exit (status);
349 #define COMMON_SHORT_OPTIONS "-bcdfgik:mMno:rsS:t:T:uz"
351 static struct option const long_options[] =
353 {"ignore-leading-blanks", no_argument, NULL, 'b'},
354 {"check", no_argument, NULL, 'c'},
355 {"dictionary-order", no_argument, NULL, 'd'},
356 {"ignore-case", no_argument, NULL, 'f'},
357 {"general-numeric-sort", no_argument, NULL, 'g'},
358 {"ignore-nonprinting", no_argument, NULL, 'i'},
359 {"key", required_argument, NULL, 'k'},
360 {"merge", no_argument, NULL, 'm'},
361 {"month-sort", no_argument, NULL, 'M'},
362 {"numeric-sort", no_argument, NULL, 'n'},
363 {"output", required_argument, NULL, 'o'},
364 {"reverse", no_argument, NULL, 'r'},
365 {"stable", no_argument, NULL, 's'},
366 {"buffer-size", required_argument, NULL, 'S'},
367 {"field-separator", required_argument, NULL, 't'},
368 {"temporary-directory", required_argument, NULL, 'T'},
369 {"unique", no_argument, NULL, 'u'},
370 {"zero-terminated", no_argument, NULL, 'z'},
371 {GETOPT_HELP_OPTION_DECL},
372 {GETOPT_VERSION_OPTION_DECL},
373 {0, 0, 0, 0},
376 /* The set of signals that are caught. */
377 static sigset_t caught_signals;
379 /* The list of temporary files. */
380 struct tempnode
382 struct tempnode *volatile next;
383 char name[1]; /* Actual size is 1 + file name length. */
385 static struct tempnode *volatile temphead;
387 /* Clean up any remaining temporary files. */
389 static void
390 cleanup (void)
392 struct tempnode const *node;
394 for (node = temphead; node; node = node->next)
395 unlink (node->name);
398 /* Report MESSAGE for FILE, then clean up and exit. */
400 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
401 static void
402 die (char const *message, char const *file)
404 error (0, errno, "%s: %s", message, file);
405 exit (SORT_FAILURE);
408 /* Create a new temporary file, returning its newly allocated name.
409 Store into *PFP a stream open for writing. */
411 static char *
412 create_temp_file (FILE **pfp)
414 static char const slashbase[] = "/sortXXXXXX";
415 static size_t temp_dir_index;
416 sigset_t oldset;
417 int fd;
418 int saved_errno;
419 char const *temp_dir = temp_dirs[temp_dir_index];
420 size_t len = strlen (temp_dir);
421 struct tempnode *node =
422 xmalloc (sizeof node->next + len + sizeof slashbase);
423 char *file = node->name;
425 memcpy (file, temp_dir, len);
426 memcpy (file + len, slashbase, sizeof slashbase);
427 node->next = temphead;
428 if (++temp_dir_index == temp_dir_count)
429 temp_dir_index = 0;
431 /* Create the temporary file in a critical section, to avoid races. */
432 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
433 fd = mkstemp (file);
434 if (0 <= fd)
435 temphead = node;
436 saved_errno = errno;
437 sigprocmask (SIG_SETMASK, &oldset, NULL);
438 errno = saved_errno;
440 if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
441 die (_("cannot create temporary file"), file);
443 return file;
446 static FILE *
447 xfopen (const char *file, const char *how)
449 FILE *fp;
451 if (STREQ (file, "-"))
453 if (*how == 'r')
455 have_read_stdin = true;
456 fp = stdin;
458 else
459 fp = stdout;
461 else
463 if ((fp = fopen_safer (file, how)) == NULL)
464 die (_("open failed"), file);
467 return fp;
470 /* Close FP, whose name is FILE, and report any errors. */
472 static void
473 xfclose (FILE *fp, char const *file)
475 if (fp == stdin)
477 /* Allow reading stdin from tty more than once. */
478 if (feof (fp))
479 clearerr (fp);
481 else
483 if (fclose (fp) != 0)
484 die (_("close failed"), file);
488 static void
489 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
491 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
492 die (_("write failed"), output_file);
495 /* Append DIR to the array of temporary directory names. */
496 static void
497 add_temp_dir (char const *dir)
499 if (temp_dir_count == temp_dir_alloc)
500 temp_dirs = x2nrealloc (temp_dirs, &temp_dir_alloc, sizeof *temp_dirs);
502 temp_dirs[temp_dir_count++] = dir;
505 /* Search through the list of temporary files for NAME;
506 remove it if it is found on the list. */
508 static void
509 zaptemp (const char *name)
511 struct tempnode *volatile *pnode;
512 struct tempnode *node;
514 for (pnode = &temphead; (node = *pnode); pnode = &node->next)
515 if (node->name == name)
517 unlink (name);
518 *pnode = node->next;
519 free (node);
520 break;
524 #if HAVE_NL_LANGINFO
526 static int
527 struct_month_cmp (const void *m1, const void *m2)
529 struct month const *month1 = m1;
530 struct month const *month2 = m2;
531 return strcmp (month1->name, month2->name);
534 #endif
536 /* Initialize the character class tables. */
538 static void
539 inittables (void)
541 int i;
543 for (i = 0; i < UCHAR_LIM; ++i)
545 blanks[i] = !!ISBLANK (i);
546 nonprinting[i] = !ISPRINT (i);
547 nondictionary[i] = !ISALNUM (i) && !ISBLANK (i);
548 fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i);
551 #if HAVE_NL_LANGINFO
552 /* If we're not in the "C" locale, read different names for months. */
553 if (hard_LC_TIME)
555 for (i = 0; i < MONTHS_PER_YEAR; i++)
557 char const *s;
558 size_t s_len;
559 size_t j;
560 char *name;
562 s = (char *) nl_langinfo (ABMON_1 + i);
563 s_len = strlen (s);
564 monthtab[i].name = name = xmalloc (s_len + 1);
565 monthtab[i].val = i + 1;
567 for (j = 0; j < s_len; j++)
568 name[j] = fold_toupper[UCHAR (s[j])];
569 name[j] = '\0';
571 qsort ((void *) monthtab, MONTHS_PER_YEAR,
572 sizeof *monthtab, struct_month_cmp);
574 #endif
577 /* Specify the amount of main memory to use when sorting. */
578 static void
579 specify_sort_size (char const *s)
581 uintmax_t n;
582 char *suffix;
583 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
585 /* The default unit is KiB. */
586 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
588 if (n <= UINTMAX_MAX / 1024)
589 n *= 1024;
590 else
591 e = LONGINT_OVERFLOW;
594 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
595 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
596 switch (suffix[0])
598 case 'b':
599 e = LONGINT_OK;
600 break;
602 case '%':
604 double mem = physmem_total () * n / 100;
606 /* Use "<", not "<=", to avoid problems with rounding. */
607 if (mem < UINTMAX_MAX)
609 n = mem;
610 e = LONGINT_OK;
612 else
613 e = LONGINT_OVERFLOW;
615 break;
618 if (e == LONGINT_OK)
620 /* If multiple sort sizes are specified, take the maximum, so
621 that option order does not matter. */
622 if (n < sort_size)
623 return;
625 sort_size = n;
626 if (sort_size == n)
628 sort_size = MAX (sort_size, MIN_SORT_SIZE);
629 return;
632 e = LONGINT_OVERFLOW;
635 STRTOL_FATAL_ERROR (s, _("sort size"), e);
638 /* Return the default sort size. */
639 static size_t
640 default_sort_size (void)
642 /* Let MEM be available memory or 1/8 of total memory, whichever
643 is greater. */
644 double avail = physmem_available ();
645 double total = physmem_total ();
646 double mem = MAX (avail, total / 8);
647 struct rlimit rlimit;
649 /* Let SIZE be MEM, but no more than the maximum object size or
650 system resource limits. Avoid the MIN macro here, as it is not
651 quite right when only one argument is floating point. Don't
652 bother to check for values like RLIM_INFINITY since in practice
653 they are not much less than SIZE_MAX. */
654 size_t size = SIZE_MAX;
655 if (mem < size)
656 size = mem;
657 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
658 size = rlimit.rlim_cur;
659 #ifdef RLIMIT_AS
660 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
661 size = rlimit.rlim_cur;
662 #endif
664 /* Leave a large safety margin for the above limits, as failure can
665 occur when they are exceeded. */
666 size /= 2;
668 #ifdef RLIMIT_RSS
669 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
670 Exceeding RSS is not fatal, but can be quite slow. */
671 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
672 size = rlimit.rlim_cur / 16 * 15;
673 #endif
675 /* Use no less than the minimum. */
676 return MAX (size, MIN_SORT_SIZE);
679 /* Return the sort buffer size to use with the input files identified
680 by FPS and FILES, which are alternate paths to the same files.
681 NFILES gives the number of input files; NFPS may be less. Assume
682 that each input line requires LINE_BYTES extra bytes' worth of line
683 information. Do not exceed a bound on the size: if the bound is
684 not specified by the user, use a default. */
686 static size_t
687 sort_buffer_size (FILE *const *fps, int nfps,
688 char *const *files, int nfiles,
689 size_t line_bytes)
691 /* A bound on the input size. If zero, the bound hasn't been
692 determined yet. */
693 static size_t size_bound;
695 /* In the worst case, each input byte is a newline. */
696 size_t worst_case_per_input_byte = line_bytes + 1;
698 /* Keep enough room for one extra input line and an extra byte.
699 This extra room might be needed when preparing to read EOF. */
700 size_t size = worst_case_per_input_byte + 1;
702 int i;
704 for (i = 0; i < nfiles; i++)
706 struct stat st;
707 off_t file_size;
708 size_t worst_case;
710 if ((i < nfps ? fstat (fileno (fps[i]), &st)
711 : strcmp (files[i], "-") == 0 ? fstat (STDIN_FILENO, &st)
712 : stat (files[i], &st))
713 != 0)
714 die (_("stat failed"), files[i]);
716 if (S_ISREG (st.st_mode))
717 file_size = st.st_size;
718 else
720 /* The file has unknown size. If the user specified a sort
721 buffer size, use that; otherwise, guess the size. */
722 if (sort_size)
723 return sort_size;
724 file_size = INPUT_FILE_SIZE_GUESS;
727 if (! size_bound)
729 size_bound = sort_size;
730 if (! size_bound)
731 size_bound = default_sort_size ();
734 /* Add the amount of memory needed to represent the worst case
735 where the input consists entirely of newlines followed by a
736 single non-newline. Check for overflow. */
737 worst_case = file_size * worst_case_per_input_byte + 1;
738 if (file_size != worst_case / worst_case_per_input_byte
739 || size_bound - size <= worst_case)
740 return size_bound;
741 size += worst_case;
744 return size;
747 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
748 must be at least sizeof (struct line). Allocate ALLOC bytes
749 initially. */
751 static void
752 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
754 /* Ensure that the line array is properly aligned. If the desired
755 size cannot be allocated, repeatedly halve it until allocation
756 succeeds. The smaller allocation may hurt overall performance,
757 but that's better than failing. */
758 for (;;)
760 alloc += sizeof (struct line) - alloc % sizeof (struct line);
761 buf->buf = malloc (alloc);
762 if (buf->buf)
763 break;
764 alloc /= 2;
765 if (alloc <= line_bytes + 1)
766 xalloc_die ();
769 buf->line_bytes = line_bytes;
770 buf->alloc = alloc;
771 buf->used = buf->left = buf->nlines = 0;
772 buf->eof = false;
775 /* Return one past the limit of the line array. */
777 static inline struct line *
778 buffer_linelim (struct buffer const *buf)
780 return (struct line *) (buf->buf + buf->alloc);
783 /* Return a pointer to the first character of the field specified
784 by KEY in LINE. */
786 static char *
787 begfield (const struct line *line, const struct keyfield *key)
789 register char *ptr = line->text, *lim = ptr + line->length - 1;
790 register size_t sword = key->sword;
791 register size_t schar = key->schar;
792 register size_t remaining_bytes;
794 /* The leading field separator itself is included in a field when -t
795 is absent. */
797 if (tab != TAB_DEFAULT)
798 while (ptr < lim && sword--)
800 while (ptr < lim && *ptr != tab)
801 ++ptr;
802 if (ptr < lim)
803 ++ptr;
805 else
806 while (ptr < lim && sword--)
808 while (ptr < lim && blanks[UCHAR (*ptr)])
809 ++ptr;
810 while (ptr < lim && !blanks[UCHAR (*ptr)])
811 ++ptr;
814 if (key->skipsblanks)
815 while (ptr < lim && blanks[UCHAR (*ptr)])
816 ++ptr;
818 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
819 remaining_bytes = lim - ptr;
820 if (schar < remaining_bytes)
821 ptr += schar;
822 else
823 ptr = lim;
825 return ptr;
828 /* Return the limit of (a pointer to the first character after) the field
829 in LINE specified by KEY. */
831 static char *
832 limfield (const struct line *line, const struct keyfield *key)
834 register char *ptr = line->text, *lim = ptr + line->length - 1;
835 register size_t eword = key->eword, echar = key->echar;
836 register size_t remaining_bytes;
838 /* Move PTR past EWORD fields or to one past the last byte on LINE,
839 whichever comes first. If there are more than EWORD fields, leave
840 PTR pointing at the beginning of the field having zero-based index,
841 EWORD. If a delimiter character was specified (via -t), then that
842 `beginning' is the first character following the delimiting TAB.
843 Otherwise, leave PTR pointing at the first `blank' character after
844 the preceding field. */
845 if (tab != TAB_DEFAULT)
846 while (ptr < lim && eword--)
848 while (ptr < lim && *ptr != tab)
849 ++ptr;
850 if (ptr < lim && (eword | echar))
851 ++ptr;
853 else
854 while (ptr < lim && eword--)
856 while (ptr < lim && blanks[UCHAR (*ptr)])
857 ++ptr;
858 while (ptr < lim && !blanks[UCHAR (*ptr)])
859 ++ptr;
862 #ifdef POSIX_UNSPECIFIED
863 /* The following block of code makes GNU sort incompatible with
864 standard Unix sort, so it's ifdef'd out for now.
865 The POSIX spec isn't clear on how to interpret this.
866 FIXME: request clarification.
868 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
869 Date: Thu, 30 May 96 12:20:41 -0400
870 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
872 [...]I believe I've found another bug in `sort'.
874 $ cat /tmp/sort.in
875 a b c 2 d
876 pq rs 1 t
877 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
878 a b c 2 d
879 pq rs 1 t
880 $ /bin/sort -k1.7,1.7 </tmp/sort.in
881 pq rs 1 t
882 a b c 2 d
884 Unix sort produced the answer I expected: sort on the single character
885 in column 7. GNU sort produced different results, because it disagrees
886 on the interpretation of the key-end spec "M.N". Unix sort reads this
887 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
888 "skip M-1 fields, then either N-1 characters or the rest of the current
889 field, whichever comes first". This extra clause applies only to
890 key-ends, not key-starts.
893 /* Make LIM point to the end of (one byte past) the current field. */
894 if (tab != TAB_DEFAULT)
896 char *newlim;
897 newlim = memchr (ptr, tab, lim - ptr);
898 if (newlim)
899 lim = newlim;
901 else
903 char *newlim;
904 newlim = ptr;
905 while (newlim < lim && blanks[UCHAR (*newlim)])
906 ++newlim;
907 while (newlim < lim && !blanks[UCHAR (*newlim)])
908 ++newlim;
909 lim = newlim;
911 #endif
913 /* If we're skipping leading blanks, don't start counting characters
914 until after skipping past any leading blanks. */
915 if (key->skipsblanks)
916 while (ptr < lim && blanks[UCHAR (*ptr)])
917 ++ptr;
919 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
920 remaining_bytes = lim - ptr;
921 if (echar < remaining_bytes)
922 ptr += echar;
923 else
924 ptr = lim;
926 return ptr;
929 /* Return the number of trailing blanks in FIELD, with LEN bytes. */
931 static size_t
932 trailing_blanks (char const *field, size_t len)
934 size_t i;
935 for (i = len; 0 < i && blanks[UCHAR (field[i - 1])]; i--)
936 continue;
937 return len - i;
940 /* Fill BUF reading from FP, moving buf->left bytes from the end
941 of buf->buf to the beginning first. If EOF is reached and the
942 file wasn't terminated by a newline, supply one. Set up BUF's line
943 table too. FILE is the name of the file corresponding to FP.
944 Return true if some input was read. */
946 static bool
947 fillbuf (struct buffer *buf, register FILE *fp, char const *file)
949 struct keyfield const *key = keylist;
950 char eol = eolchar;
951 size_t line_bytes = buf->line_bytes;
952 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
954 if (buf->eof)
955 return false;
957 if (buf->used != buf->left)
959 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
960 buf->used = buf->left;
961 buf->nlines = 0;
964 for (;;)
966 char *ptr = buf->buf + buf->used;
967 struct line *linelim = buffer_linelim (buf);
968 struct line *line = linelim - buf->nlines;
969 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
970 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
972 while (line_bytes + 1 < avail)
974 /* Read as many bytes as possible, but do not read so many
975 bytes that there might not be enough room for the
976 corresponding line array. The worst case is when the
977 rest of the input file consists entirely of newlines,
978 except that the last byte is not a newline. */
979 size_t readsize = (avail - 1) / (line_bytes + 1);
980 size_t bytes_read = fread (ptr, 1, readsize, fp);
981 char *ptrlim = ptr + bytes_read;
982 char *p;
983 avail -= bytes_read;
985 if (bytes_read != readsize)
987 if (ferror (fp))
988 die (_("read failed"), file);
989 if (feof (fp))
991 buf->eof = true;
992 if (buf->buf == ptrlim)
993 return false;
994 if (ptrlim[-1] != eol)
995 *ptrlim++ = eol;
999 /* Find and record each line in the just-read input. */
1000 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1002 ptr = p + 1;
1003 line--;
1004 line->text = line_start;
1005 line->length = ptr - line_start;
1006 mergesize = MAX (mergesize, line->length);
1007 avail -= line_bytes;
1009 if (key)
1011 /* Precompute the position of the first key for
1012 efficiency. */
1013 line->keylim = (key->eword == SIZE_MAX
1015 : limfield (line, key));
1017 if (key->sword != SIZE_MAX)
1018 line->keybeg = begfield (line, key);
1019 else
1021 if (key->skipsblanks)
1022 while (blanks[UCHAR (*line_start)])
1023 line_start++;
1024 line->keybeg = line_start;
1026 if (key->skipeblanks)
1028 size_t keylen = line->keylim - line->keybeg;
1029 line->keylim -= trailing_blanks (line->keybeg, keylen);
1033 line_start = ptr;
1036 ptr = ptrlim;
1037 if (buf->eof)
1038 break;
1041 buf->used = ptr - buf->buf;
1042 buf->nlines = buffer_linelim (buf) - line;
1043 if (buf->nlines != 0)
1045 buf->left = ptr - line_start;
1046 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1047 return true;
1050 /* The current input line is too long to fit in the buffer.
1051 Double the buffer size and try again. */
1052 buf->buf = x2nrealloc (buf->buf, &buf->alloc, sizeof *(buf->buf));
1056 /* Compare strings A and B containing decimal fractions < 1. Each string
1057 should begin with a decimal point followed immediately by the digits
1058 of the fraction. Strings not of this form are considered to be zero. */
1060 /* The goal here, is to take two numbers a and b... compare these
1061 in parallel. Instead of converting each, and then comparing the
1062 outcome. Most likely stopping the comparison before the conversion
1063 is complete. The algorithm used, in the old sort:
1065 Algorithm: fraccompare
1066 Action : compare two decimal fractions
1067 accepts : char *a, char *b
1068 returns : -1 if a<b, 0 if a=b, 1 if a>b.
1069 implement:
1071 if *a == decimal_point AND *b == decimal_point
1072 find first character different in a and b.
1073 if both are digits, return the difference *a - *b.
1074 if *a is a digit
1075 skip past zeros
1076 if digit return 1, else 0
1077 if *b is a digit
1078 skip past zeros
1079 if digit return -1, else 0
1080 if *a is a decimal_point
1081 skip past decimal_point and zeros
1082 if digit return 1, else 0
1083 if *b is a decimal_point
1084 skip past decimal_point and zeros
1085 if digit return -1, else 0
1086 return 0 */
1088 static int
1089 fraccompare (register const char *a, register const char *b)
1091 if (*a == decimal_point && *b == decimal_point)
1093 while (*++a == *++b)
1094 if (! ISDIGIT (*a))
1095 return 0;
1096 if (ISDIGIT (*a) && ISDIGIT (*b))
1097 return *a - *b;
1098 if (ISDIGIT (*a))
1099 goto a_trailing_nonzero;
1100 if (ISDIGIT (*b))
1101 goto b_trailing_nonzero;
1102 return 0;
1104 else if (*a++ == decimal_point)
1106 a_trailing_nonzero:
1107 while (*a == NUMERIC_ZERO)
1108 a++;
1109 return ISDIGIT (*a);
1111 else if (*b++ == decimal_point)
1113 b_trailing_nonzero:
1114 while (*b == NUMERIC_ZERO)
1115 b++;
1116 return - ISDIGIT (*b);
1118 return 0;
1121 /* Compare strings A and B as numbers without explicitly converting them to
1122 machine numbers. Comparatively slow for short strings, but asymptotically
1123 hideously fast. */
1125 static int
1126 numcompare (register const char *a, register const char *b)
1128 register int tmpa, tmpb, tmp;
1129 register size_t log_a, log_b;
1131 tmpa = *a;
1132 tmpb = *b;
1134 while (blanks[UCHAR (tmpa)])
1135 tmpa = *++a;
1136 while (blanks[UCHAR (tmpb)])
1137 tmpb = *++b;
1139 if (tmpa == NEGATION_SIGN)
1142 tmpa = *++a;
1143 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa));
1144 if (tmpb != NEGATION_SIGN)
1146 if (tmpa == decimal_point)
1148 tmpa = *++a;
1149 while (tmpa == NUMERIC_ZERO);
1150 if (ISDIGIT (tmpa))
1151 return -1;
1152 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1153 tmpb = *++b;
1154 if (tmpb == decimal_point)
1156 tmpb = *++b;
1157 while (tmpb == NUMERIC_ZERO);
1158 if (ISDIGIT (tmpb))
1159 return -1;
1160 return 0;
1163 tmpb = *++b;
1164 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1166 while (tmpa == tmpb && ISDIGIT (tmpa))
1169 tmpa = *++a;
1170 while (IS_THOUSANDS_SEP (tmpa));
1172 tmpb = *++b;
1173 while (IS_THOUSANDS_SEP (tmpb));
1176 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1177 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1178 return -fraccompare (a, b);
1180 tmp = tmpb - tmpa;
1182 for (log_a = 0; ISDIGIT (tmpa); ++log_a)
1184 tmpa = *++a;
1185 while (IS_THOUSANDS_SEP (tmpa));
1187 for (log_b = 0; ISDIGIT (tmpb); ++log_b)
1189 tmpb = *++b;
1190 while (IS_THOUSANDS_SEP (tmpb));
1192 if (log_a != log_b)
1193 return log_a < log_b ? 1 : -1;
1195 if (!log_a)
1196 return 0;
1198 return tmp;
1200 else if (tmpb == NEGATION_SIGN)
1203 tmpb = *++b;
1204 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1205 if (tmpb == decimal_point)
1207 tmpb = *++b;
1208 while (tmpb == NUMERIC_ZERO);
1209 if (ISDIGIT (tmpb))
1210 return 1;
1211 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1212 tmpa = *++a;
1213 if (tmpa == decimal_point)
1215 tmpa = *++a;
1216 while (tmpa == NUMERIC_ZERO);
1217 if (ISDIGIT (tmpa))
1218 return 1;
1219 return 0;
1221 else
1223 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1224 tmpa = *++a;
1225 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1226 tmpb = *++b;
1228 while (tmpa == tmpb && ISDIGIT (tmpa))
1231 tmpa = *++a;
1232 while (IS_THOUSANDS_SEP (tmpa));
1234 tmpb = *++b;
1235 while (IS_THOUSANDS_SEP (tmpb));
1238 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1239 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1240 return fraccompare (a, b);
1242 tmp = tmpa - tmpb;
1244 for (log_a = 0; ISDIGIT (tmpa); ++log_a)
1246 tmpa = *++a;
1247 while (IS_THOUSANDS_SEP (tmpa));
1249 for (log_b = 0; ISDIGIT (tmpb); ++log_b)
1251 tmpb = *++b;
1252 while (IS_THOUSANDS_SEP (tmpb));
1254 if (log_a != log_b)
1255 return log_a < log_b ? -1 : 1;
1257 if (!log_a)
1258 return 0;
1260 return tmp;
1264 static int
1265 general_numcompare (const char *sa, const char *sb)
1267 /* FIXME: add option to warn about failed conversions. */
1268 /* FIXME: maybe add option to try expensive FP conversion
1269 only if A and B can't be compared more cheaply/accurately. */
1271 char *ea;
1272 char *eb;
1273 double a = strtod (sa, &ea);
1274 double b = strtod (sb, &eb);
1276 /* Put conversion errors at the start of the collating sequence. */
1277 if (sa == ea)
1278 return sb == eb ? 0 : -1;
1279 if (sb == eb)
1280 return 1;
1282 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1283 conversion errors but before numbers; sort them by internal
1284 bit-pattern, for lack of a more portable alternative. */
1285 return (a < b ? -1
1286 : a > b ? 1
1287 : a == b ? 0
1288 : b == b ? -1
1289 : a == a ? 1
1290 : memcmp ((char *) &a, (char *) &b, sizeof a));
1293 /* Return an integer in 1..12 of the month name S with length LEN.
1294 Return 0 if the name in S is not recognized. */
1296 static int
1297 getmonth (const char *s, size_t len)
1299 char *month;
1300 register size_t i;
1301 register int lo = 0, hi = MONTHS_PER_YEAR, result;
1303 while (len > 0 && blanks[UCHAR (*s)])
1305 ++s;
1306 --len;
1309 if (len == 0)
1310 return 0;
1312 month = alloca (len + 1);
1313 for (i = 0; i < len; ++i)
1314 month[i] = fold_toupper[UCHAR (s[i])];
1315 len -= trailing_blanks (month, len);
1316 month[len] = '\0';
1320 int ix = (lo + hi) / 2;
1322 if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
1323 hi = ix;
1324 else
1325 lo = ix;
1327 while (hi - lo > 1);
1329 result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
1330 ? monthtab[lo].val : 0);
1332 return result;
1335 /* Compare two lines A and B trying every key in sequence until there
1336 are no more keys or a difference is found. */
1338 static int
1339 keycompare (const struct line *a, const struct line *b)
1341 struct keyfield const *key = keylist;
1343 /* For the first iteration only, the key positions have been
1344 precomputed for us. */
1345 register char *texta = a->keybeg;
1346 register char *textb = b->keybeg;
1347 register char *lima = a->keylim;
1348 register char *limb = b->keylim;
1350 int diff;
1352 for (;;)
1354 register char const *translate = key->translate;
1355 register bool const *ignore = key->ignore;
1357 /* Find the lengths. */
1358 size_t lena = lima <= texta ? 0 : lima - texta;
1359 size_t lenb = limb <= textb ? 0 : limb - textb;
1361 if (key->skipeblanks)
1363 lena -= trailing_blanks (texta, lena);
1364 lenb -= trailing_blanks (textb, lenb);
1367 /* Actually compare the fields. */
1368 if (key->numeric | key->general_numeric)
1370 char savea = *lima, saveb = *limb;
1372 *lima = *limb = '\0';
1373 diff = ((key->numeric ? numcompare : general_numcompare)
1374 (texta, textb));
1375 *lima = savea, *limb = saveb;
1377 else if (key->month)
1378 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1379 /* Sorting like this may become slow, so in a simple locale the user
1380 can select a faster sort that is similar to ascii sort */
1381 else if (HAVE_SETLOCALE && hard_LC_COLLATE)
1383 if (ignore || translate)
1385 char *copy_a = alloca (lena + 1 + lenb + 1);
1386 char *copy_b = copy_a + lena + 1;
1387 size_t new_len_a, new_len_b, i;
1389 /* Ignore and/or translate chars before comparing. */
1390 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1392 if (i < lena)
1394 copy_a[new_len_a] = (translate
1395 ? translate[UCHAR (texta[i])]
1396 : texta[i]);
1397 if (!ignore || !ignore[UCHAR (texta[i])])
1398 ++new_len_a;
1400 if (i < lenb)
1402 copy_b[new_len_b] = (translate
1403 ? translate[UCHAR (textb[i])]
1404 : textb [i]);
1405 if (!ignore || !ignore[UCHAR (textb[i])])
1406 ++new_len_b;
1410 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1412 else if (lena == 0)
1413 diff = - NONZERO (lenb);
1414 else if (lenb == 0)
1415 goto greater;
1416 else
1417 diff = xmemcoll (texta, lena, textb, lenb);
1419 else if (ignore)
1421 #define CMP_WITH_IGNORE(A, B) \
1422 do \
1424 for (;;) \
1426 while (texta < lima && ignore[UCHAR (*texta)]) \
1427 ++texta; \
1428 while (textb < limb && ignore[UCHAR (*textb)]) \
1429 ++textb; \
1430 if (! (texta < lima && textb < limb)) \
1431 break; \
1432 diff = UCHAR (A) - UCHAR (B); \
1433 if (diff) \
1434 goto not_equal; \
1435 ++texta; \
1436 ++textb; \
1439 diff = (texta < lima) - (textb < limb); \
1441 while (0)
1443 if (translate)
1444 CMP_WITH_IGNORE (translate[UCHAR (*texta)],
1445 translate[UCHAR (*textb)]);
1446 else
1447 CMP_WITH_IGNORE (*texta, *textb);
1449 else if (lena == 0)
1450 diff = - NONZERO (lenb);
1451 else if (lenb == 0)
1452 goto greater;
1453 else
1455 if (translate)
1457 while (texta < lima && textb < limb)
1459 diff = (UCHAR (translate[UCHAR (*texta++)])
1460 - UCHAR (translate[UCHAR (*textb++)]));
1461 if (diff)
1462 goto not_equal;
1465 else
1467 diff = memcmp (texta, textb, MIN (lena, lenb));
1468 if (diff)
1469 goto not_equal;
1471 diff = lena < lenb ? -1 : lena != lenb;
1474 if (diff)
1475 goto not_equal;
1477 key = key->next;
1478 if (! key)
1479 break;
1481 /* Find the beginning and limit of the next field. */
1482 if (key->eword != SIZE_MAX)
1483 lima = limfield (a, key), limb = limfield (b, key);
1484 else
1485 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1487 if (key->sword != SIZE_MAX)
1488 texta = begfield (a, key), textb = begfield (b, key);
1489 else
1491 texta = a->text, textb = b->text;
1492 if (key->skipsblanks)
1494 while (texta < lima && blanks[UCHAR (*texta)])
1495 ++texta;
1496 while (textb < limb && blanks[UCHAR (*textb)])
1497 ++textb;
1502 return 0;
1504 greater:
1505 diff = 1;
1506 not_equal:
1507 return key->reverse ? -diff : diff;
1510 /* Compare two lines A and B, returning negative, zero, or positive
1511 depending on whether A compares less than, equal to, or greater than B. */
1513 static int
1514 compare (register const struct line *a, register const struct line *b)
1516 int diff;
1517 size_t alen, blen;
1519 /* First try to compare on the specified keys (if any).
1520 The only two cases with no key at all are unadorned sort,
1521 and unadorned sort -r. */
1522 if (keylist)
1524 diff = keycompare (a, b);
1525 alloca (0);
1526 if (diff | unique | stable)
1527 return diff;
1530 /* If the keys all compare equal (or no keys were specified)
1531 fall through to the default comparison. */
1532 alen = a->length - 1, blen = b->length - 1;
1534 if (alen == 0)
1535 diff = - NONZERO (blen);
1536 else if (blen == 0)
1537 diff = 1;
1538 else if (HAVE_SETLOCALE && hard_LC_COLLATE)
1539 diff = xmemcoll (a->text, alen, b->text, blen);
1540 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1541 diff = alen < blen ? -1 : alen != blen;
1543 return reverse ? -diff : diff;
1546 /* Check that the lines read from FILE_NAME come in order. Print a
1547 diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1548 false if they are not in order. Otherwise, print no diagnostic
1549 and return true. */
1551 static bool
1552 check (char const *file_name)
1554 FILE *fp = xfopen (file_name, "r");
1555 struct buffer buf; /* Input buffer. */
1556 struct line temp; /* Copy of previous line. */
1557 size_t alloc = 0;
1558 uintmax_t line_number = 0;
1559 struct keyfield const *key = keylist;
1560 bool nonunique = ! unique;
1561 bool ordered = true;
1563 initbuf (&buf, sizeof (struct line),
1564 MAX (merge_buffer_size, sort_size));
1565 temp.text = NULL;
1567 while (fillbuf (&buf, fp, file_name))
1569 struct line const *line = buffer_linelim (&buf);
1570 struct line const *linebase = line - buf.nlines;
1572 /* Make sure the line saved from the old buffer contents is
1573 less than or equal to the first line of the new buffer. */
1574 if (alloc && nonunique <= compare (&temp, line - 1))
1576 found_disorder:
1578 struct line const *disorder_line = line - 1;
1579 uintmax_t disorder_line_number =
1580 buffer_linelim (&buf) - disorder_line + line_number;
1581 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1582 fprintf (stderr, _("%s: %s:%s: disorder: "),
1583 program_name, file_name,
1584 umaxtostr (disorder_line_number, hr_buf));
1585 write_bytes (disorder_line->text, disorder_line->length, stderr,
1586 _("standard error"));
1587 ordered = false;
1588 break;
1592 /* Compare each line in the buffer with its successor. */
1593 while (linebase < --line)
1594 if (nonunique <= compare (line, line - 1))
1595 goto found_disorder;
1597 line_number += buf.nlines;
1599 /* Save the last line of the buffer. */
1600 if (alloc < line->length)
1604 alloc *= 2;
1605 if (! alloc)
1607 alloc = line->length;
1608 break;
1611 while (alloc < line->length);
1613 temp.text = xrealloc (temp.text, alloc);
1615 memcpy (temp.text, line->text, line->length);
1616 temp.length = line->length;
1617 if (key)
1619 temp.keybeg = temp.text + (line->keybeg - line->text);
1620 temp.keylim = temp.text + (line->keylim - line->text);
1624 xfclose (fp, file_name);
1625 free (buf.buf);
1626 if (temp.text)
1627 free (temp.text);
1628 return ordered;
1631 /* Merge lines from FILES onto OFP. NFILES cannot be greater than
1632 NMERGE. Close input and output files before returning.
1633 OUTPUT_FILE gives the name of the output file; if OFP is NULL, the
1634 output file has not been opened yet. */
1636 static void
1637 mergefps (char **files, register int nfiles,
1638 FILE *ofp, const char *output_file)
1640 FILE *fps[NMERGE]; /* Input streams for each file. */
1641 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1642 struct line saved; /* Saved line storage for unique check. */
1643 struct line const *savedline = NULL;
1644 /* &saved if there is a saved line. */
1645 size_t savealloc = 0; /* Size allocated for the saved line. */
1646 struct line const *cur[NMERGE]; /* Current line in each line table. */
1647 struct line const *base[NMERGE]; /* Base of each line table. */
1648 int ord[NMERGE]; /* Table representing a permutation of fps,
1649 such that cur[ord[0]] is the smallest line
1650 and will be next output. */
1651 register int i, j, t;
1652 struct keyfield const *key = keylist;
1653 saved.text = NULL;
1655 /* Read initial lines from each input file. */
1656 for (i = 0; i < nfiles; )
1658 fps[i] = xfopen (files[i], "r");
1659 initbuf (&buffer[i], sizeof (struct line),
1660 MAX (merge_buffer_size, sort_size / nfiles));
1661 if (fillbuf (&buffer[i], fps[i], files[i]))
1663 struct line const *linelim = buffer_linelim (&buffer[i]);
1664 cur[i] = linelim - 1;
1665 base[i] = linelim - buffer[i].nlines;
1666 i++;
1668 else
1670 /* fps[i] is empty; eliminate it from future consideration. */
1671 xfclose (fps[i], files[i]);
1672 zaptemp (files[i]);
1673 free (buffer[i].buf);
1674 --nfiles;
1675 for (j = i; j < nfiles; ++j)
1676 files[j] = files[j + 1];
1680 if (! ofp)
1681 ofp = xfopen (output_file, "w");
1683 /* Set up the ord table according to comparisons among input lines.
1684 Since this only reorders two items if one is strictly greater than
1685 the other, it is stable. */
1686 for (i = 0; i < nfiles; ++i)
1687 ord[i] = i;
1688 for (i = 1; i < nfiles; ++i)
1689 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
1690 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1692 /* Repeatedly output the smallest line until no input remains. */
1693 while (nfiles)
1695 struct line const *smallest = cur[ord[0]];
1697 /* If uniquified output is turned on, output only the first of
1698 an identical series of lines. */
1699 if (unique)
1701 if (savedline && compare (savedline, smallest))
1703 savedline = 0;
1704 write_bytes (saved.text, saved.length, ofp, output_file);
1706 if (!savedline)
1708 savedline = &saved;
1709 if (savealloc < smallest->length)
1712 if (! savealloc)
1714 savealloc = smallest->length;
1715 break;
1717 while ((savealloc *= 2) < smallest->length);
1719 saved.text = xrealloc (saved.text, savealloc);
1721 saved.length = smallest->length;
1722 memcpy (saved.text, smallest->text, saved.length);
1723 if (key)
1725 saved.keybeg =
1726 saved.text + (smallest->keybeg - smallest->text);
1727 saved.keylim =
1728 saved.text + (smallest->keylim - smallest->text);
1732 else
1733 write_bytes (smallest->text, smallest->length, ofp, output_file);
1735 /* Check if we need to read more lines into core. */
1736 if (base[ord[0]] < smallest)
1737 cur[ord[0]] = smallest - 1;
1738 else
1740 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
1742 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
1743 cur[ord[0]] = linelim - 1;
1744 base[ord[0]] = linelim - buffer[ord[0]].nlines;
1746 else
1748 /* We reached EOF on fps[ord[0]]. */
1749 for (i = 1; i < nfiles; ++i)
1750 if (ord[i] > ord[0])
1751 --ord[i];
1752 --nfiles;
1753 xfclose (fps[ord[0]], files[ord[0]]);
1754 zaptemp (files[ord[0]]);
1755 free (buffer[ord[0]].buf);
1756 for (i = ord[0]; i < nfiles; ++i)
1758 fps[i] = fps[i + 1];
1759 files[i] = files[i + 1];
1760 buffer[i] = buffer[i + 1];
1761 cur[i] = cur[i + 1];
1762 base[i] = base[i + 1];
1764 for (i = 0; i < nfiles; ++i)
1765 ord[i] = ord[i + 1];
1766 continue;
1770 /* The new line just read in may be larger than other lines
1771 already in core; push it back in the queue until we encounter
1772 a line larger than it. */
1773 for (i = 1; i < nfiles; ++i)
1775 t = compare (cur[ord[0]], cur[ord[i]]);
1776 if (!t)
1777 t = ord[0] - ord[i];
1778 if (t < 0)
1779 break;
1781 t = ord[0];
1782 for (j = 1; j < i; ++j)
1783 ord[j - 1] = ord[j];
1784 ord[i - 1] = t;
1787 if (unique && savedline)
1789 write_bytes (saved.text, saved.length, ofp, output_file);
1790 free (saved.text);
1793 xfclose (ofp, output_file);
1796 /* Merge into T the two sorted arrays of lines LO (with NLO members)
1797 and HI (with NHI members). T, LO, and HI point just past their
1798 respective arrays, and the arrays are in reverse order. NLO and
1799 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
1801 static inline void
1802 mergelines (struct line *t,
1803 struct line const *lo, size_t nlo,
1804 struct line const *hi, size_t nhi)
1806 for (;;)
1807 if (compare (lo - 1, hi - 1) <= 0)
1809 *--t = *--lo;
1810 if (! --nlo)
1812 /* HI - NHI equalled T - (NLO + NHI) when this function
1813 began. Therefore HI must equal T now, and there is no
1814 need to copy from HI to T. */
1815 return;
1818 else
1820 *--t = *--hi;
1821 if (! --nhi)
1824 *--t = *--lo;
1825 while (--nlo);
1827 return;
1832 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
1833 NLINES must be at least 2.
1834 The input and output arrays are in reverse order, and LINES and
1835 TEMP point just past the end of their respective arrays.
1837 Use a recursive divide-and-conquer algorithm, in the style
1838 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
1839 the optimization suggested by exercise 5.2.4-10; this requires room
1840 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
1841 writes that this memory optimization was originally published by
1842 D. A. Bell, Comp J. 1 (1958), 75. */
1844 static void
1845 sortlines (struct line *lines, size_t nlines, struct line *temp)
1847 if (nlines == 2)
1849 if (0 < compare (&lines[-1], &lines[-2]))
1851 struct line tmp = lines[-1];
1852 lines[-1] = lines[-2];
1853 lines[-2] = tmp;
1856 else
1858 size_t nlo = nlines / 2;
1859 size_t nhi = nlines - nlo;
1860 struct line *lo = lines;
1861 struct line *hi = lines - nlo;
1862 struct line *sorted_lo = temp;
1864 sortlines (hi, nhi, temp);
1865 if (1 < nlo)
1866 sortlines_temp (lo, nlo, sorted_lo);
1867 else
1868 sorted_lo[-1] = lo[-1];
1870 mergelines (lines, sorted_lo, nlo, hi, nhi);
1874 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
1875 rather than sorting in place. */
1877 static void
1878 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
1880 if (nlines == 2)
1882 bool swap = (0 < compare (&lines[-1], &lines[-2]));
1883 temp[-1] = lines[-1 - swap];
1884 temp[-2] = lines[-2 + swap];
1886 else
1888 size_t nlo = nlines / 2;
1889 size_t nhi = nlines - nlo;
1890 struct line *lo = lines;
1891 struct line *hi = lines - nlo;
1892 struct line *sorted_hi = temp - nlo;
1894 sortlines_temp (hi, nhi, sorted_hi);
1895 if (1 < nlo)
1896 sortlines (lo, nlo, temp);
1898 mergelines (temp, lo, nlo, sorted_hi, nhi);
1902 /* Return the index of the first of NFILES FILES that is the same file
1903 as OUTFILE. If none can be the same, return NFILES. Consider an
1904 input pipe to be the same as OUTFILE, since the pipe might be the
1905 output of a command like "cat OUTFILE". */
1907 static int
1908 first_same_file (char * const *files, int nfiles, char const *outfile)
1910 int i;
1911 bool got_outstat = false;
1912 struct stat instat, outstat;
1914 for (i = 0; i < nfiles; i++)
1916 bool standard_input = STREQ (files[i], "-");
1918 if (STREQ (outfile, files[i]) && ! standard_input)
1919 return i;
1921 if (! got_outstat)
1923 got_outstat = true;
1924 if ((STREQ (outfile, "-")
1925 ? fstat (STDOUT_FILENO, &outstat)
1926 : stat (outfile, &outstat))
1927 != 0)
1928 return nfiles;
1931 if (((standard_input
1932 ? fstat (STDIN_FILENO, &instat)
1933 : stat (files[i], &instat))
1934 == 0)
1935 && (S_ISFIFO (instat.st_mode) || SAME_INODE (instat, outstat)))
1936 return i;
1939 return nfiles;
1942 /* Merge NFILES FILES onto OUTPUT_FILE. However, merge at most
1943 MAX_MERGE input files directly onto OUTPUT_FILE. MAX_MERGE cannot
1944 exceed NMERGE. */
1946 static void
1947 merge (char **files, int nfiles, int max_merge, char const *output_file)
1949 while (max_merge < nfiles)
1951 FILE *tfp;
1952 int i, t = 0;
1953 char *temp;
1954 for (i = 0; i < nfiles / NMERGE; ++i)
1956 temp = create_temp_file (&tfp);
1957 mergefps (&files[i * NMERGE], NMERGE, tfp, temp);
1958 files[t++] = temp;
1960 temp = create_temp_file (&tfp);
1961 mergefps (&files[i * NMERGE], nfiles % NMERGE, tfp, temp);
1962 files[t++] = temp;
1963 nfiles = t;
1964 if (nfiles == 1)
1965 break;
1968 mergefps (files, nfiles, NULL, output_file);
1971 /* Sort NFILES FILES onto OUTPUT_FILE. */
1973 static void
1974 sort (char * const *files, int nfiles, char const *output_file)
1976 struct buffer buf;
1977 int n_temp_files = 0;
1978 bool output_file_created = false;
1980 buf.alloc = 0;
1982 while (nfiles)
1984 char const *temp_output;
1985 char const *file = *files;
1986 FILE *fp = xfopen (file, "r");
1987 FILE *tfp;
1988 size_t bytes_per_line = (2 * sizeof (struct line)
1989 - sizeof (struct line) / 2);
1991 if (! buf.alloc)
1992 initbuf (&buf, bytes_per_line,
1993 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
1994 buf.eof = false;
1995 files++;
1996 nfiles--;
1998 while (fillbuf (&buf, fp, file))
2000 struct line *line;
2001 struct line *linebase;
2003 if (buf.eof && nfiles
2004 && (bytes_per_line + 1
2005 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2007 /* End of file, but there is more input and buffer room.
2008 Concatenate the next input file; this is faster in
2009 the usual case. */
2010 buf.left = buf.used;
2011 break;
2014 line = buffer_linelim (&buf);
2015 linebase = line - buf.nlines;
2016 if (1 < buf.nlines)
2017 sortlines (line, buf.nlines, linebase);
2018 if (buf.eof && !nfiles && !n_temp_files && !buf.left)
2020 xfclose (fp, file);
2021 tfp = xfopen (output_file, "w");
2022 temp_output = output_file;
2023 output_file_created = true;
2025 else
2027 ++n_temp_files;
2028 temp_output = create_temp_file (&tfp);
2033 line--;
2034 write_bytes (line->text, line->length, tfp, temp_output);
2035 if (unique)
2036 while (linebase < line && compare (line, line - 1) == 0)
2037 line--;
2039 while (linebase < line);
2041 xfclose (tfp, temp_output);
2043 if (output_file_created)
2044 goto finish;
2046 xfclose (fp, file);
2049 finish:
2050 free (buf.buf);
2052 if (! output_file_created)
2054 int i = n_temp_files;
2055 struct tempnode *node;
2056 char **tempfiles = xnmalloc (n_temp_files, sizeof *tempfiles);
2057 for (node = temphead; i > 0; node = node->next)
2058 tempfiles[--i] = node->name;
2059 merge (tempfiles, n_temp_files, NMERGE, output_file);
2060 free (tempfiles);
2064 /* Insert key KEY at the end of the key list. */
2066 static void
2067 insertkey (struct keyfield *key)
2069 struct keyfield **p;
2071 for (p = &keylist; *p; p = &(*p)->next)
2072 continue;
2073 *p = key;
2074 key->next = NULL;
2077 /* Report a bad field specification SPEC, with extra info MSGID. */
2079 static void badfieldspec (char const *, char const *)
2080 ATTRIBUTE_NORETURN;
2081 static void
2082 badfieldspec (char const *spec, char const *msgid)
2084 error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"),
2085 _(msgid), spec);
2086 abort ();
2089 /* Parse the leading integer in STRING and store the resulting value
2090 (which must fit into size_t) into *VAL. Return the address of the
2091 suffix after the integer. If MSGID is NULL, return NULL after
2092 failure; otherwise, report MSGID and exit on failure. */
2094 static char const *
2095 parse_field_count (char const *string, size_t *val, char const *msgid)
2097 char *suffix;
2098 uintmax_t n;
2100 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2102 case LONGINT_OK:
2103 case LONGINT_INVALID_SUFFIX_CHAR:
2104 *val = n;
2105 if (*val == n)
2106 break;
2107 /* Fall through. */
2108 case LONGINT_OVERFLOW:
2109 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2110 if (msgid)
2111 error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2112 _(msgid), (int) (suffix - string), string);
2113 return NULL;
2115 case LONGINT_INVALID:
2116 if (msgid)
2117 error (SORT_FAILURE, 0, _("%s: invalid count at start of `%s'"),
2118 _(msgid), string);
2119 return NULL;
2122 return suffix;
2125 /* Handle interrupts and hangups. */
2127 static void
2128 sighandler (int sig)
2130 #ifndef SA_NOCLDSTOP
2131 signal (sig, SIG_IGN);
2132 #endif
2134 cleanup ();
2136 #ifdef SA_NOCLDSTOP
2138 struct sigaction sigact;
2140 sigact.sa_handler = SIG_DFL;
2141 sigemptyset (&sigact.sa_mask);
2142 sigact.sa_flags = 0;
2143 sigaction (sig, &sigact, NULL);
2145 #else
2146 signal (sig, SIG_DFL);
2147 #endif
2149 raise (sig);
2152 /* Set the ordering options for KEY specified in S.
2153 Return the address of the first character in S that
2154 is not a valid ordering option.
2155 BLANKTYPE is the kind of blanks that 'b' should skip. */
2157 static char *
2158 set_ordering (register const char *s, struct keyfield *key,
2159 enum blanktype blanktype)
2161 while (*s)
2163 switch (*s)
2165 case 'b':
2166 if (blanktype == bl_start || blanktype == bl_both)
2167 key->skipsblanks = true;
2168 if (blanktype == bl_end || blanktype == bl_both)
2169 key->skipeblanks = true;
2170 break;
2171 case 'd':
2172 key->ignore = nondictionary;
2173 break;
2174 case 'f':
2175 key->translate = fold_toupper;
2176 break;
2177 case 'g':
2178 key->general_numeric = true;
2179 break;
2180 case 'i':
2181 /* Option order should not matter, so don't let -i override
2182 -d. -d implies -i, but -i does not imply -d. */
2183 if (! key->ignore)
2184 key->ignore = nonprinting;
2185 break;
2186 case 'M':
2187 key->month = true;
2188 break;
2189 case 'n':
2190 key->numeric = true;
2191 break;
2192 case 'r':
2193 key->reverse = true;
2194 break;
2195 default:
2196 return (char *) s;
2198 ++s;
2200 return (char *) s;
2203 static struct keyfield *
2204 new_key (void)
2206 struct keyfield *key = xzalloc (sizeof *key);
2207 key->eword = SIZE_MAX;
2208 return key;
2212 main (int argc, char **argv)
2214 struct keyfield *key;
2215 struct keyfield gkey;
2216 char const *s;
2217 int c = 0;
2218 bool checkonly = false;
2219 bool mergeonly = false;
2220 int nfiles = 0;
2221 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2222 bool obsolete_usage = (posix2_version () < 200112);
2223 char const *short_options = (obsolete_usage
2224 ? COMMON_SHORT_OPTIONS "y::"
2225 : COMMON_SHORT_OPTIONS "y:");
2226 char *minus = "-", **files;
2227 char const *outfile = minus;
2228 static int const sigs[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2229 unsigned int nsigs = sizeof sigs / sizeof *sigs;
2230 #ifdef SA_NOCLDSTOP
2231 struct sigaction oldact, newact;
2232 #endif
2234 initialize_main (&argc, &argv);
2235 program_name = argv[0];
2236 setlocale (LC_ALL, "");
2237 bindtextdomain (PACKAGE, LOCALEDIR);
2238 textdomain (PACKAGE);
2240 atexit (cleanup);
2242 initialize_exit_failure (SORT_FAILURE);
2243 atexit (close_stdout);
2245 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2246 #if HAVE_NL_LANGINFO
2247 hard_LC_TIME = hard_locale (LC_TIME);
2248 #endif
2250 #if HAVE_SETLOCALE
2251 /* Let's get locale's representation of the decimal point */
2253 struct lconv const *lconvp = localeconv ();
2255 /* If the locale doesn't define a decimal point, or if the decimal
2256 point is multibyte, use the C decimal point. We don't support
2257 multibyte decimal points yet. */
2258 decimal_point = *lconvp->decimal_point;
2259 if (! decimal_point || lconvp->decimal_point[1])
2260 decimal_point = C_DECIMAL_POINT;
2262 /* We don't support multibyte thousands separators yet. */
2263 th_sep = *lconvp->thousands_sep;
2264 if (! th_sep || lconvp->thousands_sep[1])
2265 th_sep = CHAR_MAX + 1;
2267 #endif
2269 have_read_stdin = false;
2270 inittables ();
2272 #ifdef SA_NOCLDSTOP
2274 unsigned int i;
2275 sigemptyset (&caught_signals);
2276 for (i = 0; i < nsigs; i++)
2277 sigaddset (&caught_signals, sigs[i]);
2278 newact.sa_handler = sighandler;
2279 newact.sa_mask = caught_signals;
2280 newact.sa_flags = 0;
2282 #endif
2285 unsigned int i;
2286 for (i = 0; i < nsigs; i++)
2288 int sig = sigs[i];
2289 #ifdef SA_NOCLDSTOP
2290 sigaction (sig, NULL, &oldact);
2291 if (oldact.sa_handler != SIG_IGN)
2292 sigaction (sig, &newact, NULL);
2293 #else
2294 if (signal (sig, SIG_IGN) != SIG_IGN)
2295 signal (sig, sighandler);
2296 #endif
2300 gkey.sword = gkey.eword = SIZE_MAX;
2301 gkey.ignore = NULL;
2302 gkey.translate = NULL;
2303 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
2304 gkey.skipsblanks = gkey.skipeblanks = false;
2306 files = xnmalloc (argc, sizeof *files);
2308 for (;;)
2310 /* Parse an operand as a file after "--" was seen; or if
2311 pedantic and a file was seen, unless the POSIX version
2312 predates 1003.1-2001 and -c was not seen and the operand is
2313 "-o FILE" or "-oFILE". */
2315 if (c == -1
2316 || (posixly_correct && nfiles != 0
2317 && ! (obsolete_usage
2318 && ! checkonly
2319 && optind != argc
2320 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2321 && (argv[optind][2] || optind + 1 != argc)))
2322 || ((c = getopt_long (argc, argv, short_options,
2323 long_options, NULL))
2324 == -1))
2326 if (argc <= optind)
2327 break;
2328 files[nfiles++] = argv[optind++];
2330 else switch (c)
2332 case 1:
2333 key = NULL;
2334 if (obsolete_usage && optarg[0] == '+')
2336 /* Treat +POS1 [-POS2] as a key if possible; but silently
2337 treat an operand as a file if it is not a valid +POS1. */
2338 key = new_key ();
2339 s = parse_field_count (optarg + 1, &key->sword, NULL);
2340 if (s && *s == '.')
2341 s = parse_field_count (s + 1, &key->schar, NULL);
2342 if (! (key->sword | key->schar))
2343 key->sword = SIZE_MAX;
2344 if (! s || *set_ordering (s, key, bl_start))
2346 free (key);
2347 key = NULL;
2349 else
2351 if (optind != argc && argv[optind][0] == '-'
2352 && ISDIGIT (argv[optind][1]))
2354 char const *optarg1 = argv[optind++];
2355 s = parse_field_count (optarg1 + 1, &key->eword,
2356 N_("invalid number after `-'"));
2357 if (*s == '.')
2358 s = parse_field_count (s + 1, &key->echar,
2359 N_("invalid number after `.'"));
2360 if (*set_ordering (s, key, bl_end))
2361 badfieldspec (optarg1,
2362 N_("stray character in field spec"));
2364 insertkey (key);
2367 if (! key)
2368 files[nfiles++] = optarg;
2369 break;
2371 case 'b':
2372 case 'd':
2373 case 'f':
2374 case 'g':
2375 case 'i':
2376 case 'M':
2377 case 'n':
2378 case 'r':
2380 char str[2];
2381 str[0] = c;
2382 str[1] = '\0';
2383 set_ordering (str, &gkey, bl_both);
2385 break;
2387 case 'c':
2388 checkonly = true;
2389 break;
2391 case 'k':
2392 key = new_key ();
2394 /* Get POS1. */
2395 s = parse_field_count (optarg, &key->sword,
2396 N_("invalid number at field start"));
2397 if (! key->sword--)
2399 /* Provoke with `sort -k0' */
2400 badfieldspec (optarg, N_("field number is zero"));
2402 if (*s == '.')
2404 s = parse_field_count (s + 1, &key->schar,
2405 N_("invalid number after `.'"));
2406 if (! key->schar--)
2408 /* Provoke with `sort -k1.0' */
2409 badfieldspec (optarg, N_("character offset is zero"));
2412 if (! (key->sword | key->schar))
2413 key->sword = SIZE_MAX;
2414 s = set_ordering (s, key, bl_start);
2415 if (*s != ',')
2417 key->eword = SIZE_MAX;
2418 key->echar = 0;
2420 else
2422 /* Get POS2. */
2423 s = parse_field_count (s + 1, &key->eword,
2424 N_("invalid number after `,'"));
2425 if (! key->eword--)
2427 /* Provoke with `sort -k1,0' */
2428 badfieldspec (optarg, N_("field number is zero"));
2430 if (*s == '.')
2431 s = parse_field_count (s + 1, &key->echar,
2432 N_("invalid number after `.'"));
2433 else
2435 /* `-k 2,3' is equivalent to `+1 -3'. */
2436 key->eword++;
2438 s = set_ordering (s, key, bl_end);
2440 if (*s)
2441 badfieldspec (optarg, N_("stray character in field spec"));
2442 insertkey (key);
2443 break;
2445 case 'm':
2446 mergeonly = true;
2447 break;
2449 case 'o':
2450 if (outfile != minus && strcmp (outfile, optarg) != 0)
2451 error (SORT_FAILURE, 0, _("multiple output files specified"));
2452 outfile = optarg;
2453 break;
2455 case 's':
2456 stable = true;
2457 break;
2459 case 'S':
2460 specify_sort_size (optarg);
2461 break;
2463 case 't':
2465 int newtab = optarg[0];
2466 if (! newtab)
2467 error (SORT_FAILURE, 0, _("empty tab"));
2468 if (optarg[1])
2470 if (strcmp (optarg, "\\0") == 0)
2471 newtab = '\0';
2472 else
2474 /* Provoke with `sort -txx'. Complain about
2475 "multi-character tab" instead of "multibyte tab", so
2476 that the diagnostic's wording does not need to be
2477 changed once multibyte characters are supported. */
2478 error (SORT_FAILURE, 0, _("multi-character tab `%s'"),
2479 optarg);
2482 if (tab != TAB_DEFAULT && tab != newtab)
2483 error (SORT_FAILURE, 0, _("incompatible tabs"));
2484 tab = newtab;
2486 break;
2488 case 'T':
2489 add_temp_dir (optarg);
2490 break;
2492 case 'u':
2493 unique = true;
2494 break;
2496 case 'y':
2497 /* Accept and ignore e.g. -y0 for compatibility with Solaris
2498 2.x through Solaris 7. -y is marked as obsolete starting
2499 with Solaris 8. */
2500 break;
2502 case 'z':
2503 eolchar = 0;
2504 break;
2506 case_GETOPT_HELP_CHAR;
2508 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2510 default:
2511 usage (SORT_FAILURE);
2515 /* Inheritance of global options to individual keys. */
2516 for (key = keylist; key; key = key->next)
2517 if (! (key->ignore || key->translate
2518 || (key->skipsblanks | key->reverse
2519 | key->skipeblanks | key->month | key->numeric
2520 | key->general_numeric)))
2522 key->ignore = gkey.ignore;
2523 key->translate = gkey.translate;
2524 key->skipsblanks = gkey.skipsblanks;
2525 key->skipeblanks = gkey.skipeblanks;
2526 key->month = gkey.month;
2527 key->numeric = gkey.numeric;
2528 key->general_numeric = gkey.general_numeric;
2529 key->reverse = gkey.reverse;
2532 if (!keylist && (gkey.ignore || gkey.translate
2533 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
2534 | gkey.numeric | gkey.general_numeric)))
2535 insertkey (&gkey);
2536 reverse = gkey.reverse;
2538 if (temp_dir_count == 0)
2540 char const *tmp_dir = getenv ("TMPDIR");
2541 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
2544 if (nfiles == 0)
2546 nfiles = 1;
2547 files = &minus;
2550 if (checkonly)
2552 if (nfiles > 1)
2553 error (SORT_FAILURE, 0, _("extra operand `%s' not allowed with -c"),
2554 files[1]);
2556 /* POSIX requires that sort return 1 IFF invoked with -c and the
2557 input is not properly sorted. */
2558 exit (check (files[0]) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
2561 if (mergeonly)
2563 int max_merge = first_same_file (files, MIN (nfiles, NMERGE), outfile);
2564 merge (files, nfiles, max_merge, outfile);
2566 else
2567 sort (files, nfiles, outfile);
2569 if (have_read_stdin && fclose (stdin) == EOF)
2570 die (_("close failed"), "-");
2572 exit (EXIT_SUCCESS);