Sanitize environment.
[coreutils.git] / src / sort.c
blob61071ac61e2e15c0bb55e6eb5e576ebee3066440
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 1991-2004 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
24 #include <config.h>
26 #include <getopt.h>
27 #include <sys/types.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #include "system.h"
31 #include "error.h"
32 #include "hard-locale.h"
33 #include "inttostr.h"
34 #include "physmem.h"
35 #include "posixver.h"
36 #include "quote.h"
37 #include "stdio-safer.h"
38 #include "xmemcoll.h"
39 #include "xstrtol.h"
41 #if HAVE_SYS_RESOURCE_H
42 # include <sys/resource.h>
43 #endif
44 #ifndef RLIMIT_DATA
45 struct rlimit { size_t rlim_cur; };
46 # define getrlimit(Resource, Rlp) (-1)
47 #endif
49 /* The official name of this program (e.g., no `g' prefix). */
50 #define PROGRAM_NAME "sort"
52 #define AUTHORS "Mike Haertel", "Paul Eggert"
54 #if HAVE_LANGINFO_CODESET
55 # include <langinfo.h>
56 #endif
58 #ifndef SA_NOCLDSTOP
59 # define sigprocmask(How, Set, Oset) /* empty */
60 # define sigset_t int
61 #endif
63 #ifndef STDC_HEADERS
64 double strtod ();
65 #endif
67 #define UCHAR_LIM (UCHAR_MAX + 1)
69 #ifndef DEFAULT_TMPDIR
70 # define DEFAULT_TMPDIR "/tmp"
71 #endif
73 /* Exit statuses. */
74 enum
76 /* POSIX says to exit with status 1 if invoked with -c and the
77 input is not properly sorted. */
78 SORT_OUT_OF_ORDER = 1,
80 /* POSIX says any other irregular exit must exit with a status
81 code greater than 1. */
82 SORT_FAILURE = 2
85 #define NEGATION_SIGN '-'
86 #define NUMERIC_ZERO '0'
88 /* The representation of the decimal point in the current locale. */
89 static char decimal_point;
91 /* Thousands separator; if CHAR_MAX + 1, then there isn't one. */
92 static int thousands_sep;
94 /* Nonzero if the corresponding locales are hard. */
95 static bool hard_LC_COLLATE;
96 #if HAVE_NL_LANGINFO
97 static bool hard_LC_TIME;
98 #endif
100 #define NONZERO(x) ((x) != 0)
102 /* The kind of blanks for '-b' to skip in various options. */
103 enum blanktype { bl_start, bl_end, bl_both };
105 /* The character marking end of line. Default to \n. */
106 static char eolchar = '\n';
108 /* Lines are held in core as counted strings. */
109 struct line
111 char *text; /* Text of the line. */
112 size_t length; /* Length including final newline. */
113 char *keybeg; /* Start of first key. */
114 char *keylim; /* Limit of first key. */
117 /* Input buffers. */
118 struct buffer
120 char *buf; /* Dynamically allocated buffer,
121 partitioned into 3 regions:
122 - input data;
123 - unused area;
124 - an array of lines, in reverse order. */
125 size_t used; /* Number of bytes used for input data. */
126 size_t nlines; /* Number of lines in the line array. */
127 size_t alloc; /* Number of bytes allocated. */
128 size_t left; /* Number of bytes left from previous reads. */
129 size_t line_bytes; /* Number of bytes to reserve for each line. */
130 bool eof; /* An EOF has been read. */
133 struct keyfield
135 size_t sword; /* Zero-origin 'word' to start at. */
136 size_t schar; /* Additional characters to skip. */
137 size_t eword; /* Zero-origin first word after field. */
138 size_t echar; /* Additional characters in field. */
139 bool const *ignore; /* Boolean array of characters to ignore. */
140 char const *translate; /* Translation applied to characters. */
141 bool skipsblanks; /* Skip leading blanks when finding start. */
142 bool skipeblanks; /* Skip leading blanks when finding end. */
143 bool numeric; /* Flag for numeric comparison. Handle
144 strings of digits with optional decimal
145 point, but no exponential notation. */
146 bool general_numeric; /* Flag for general, numeric comparison.
147 Handle numbers in exponential notation. */
148 bool month; /* Flag for comparison by month name. */
149 bool reverse; /* Reverse the sense of comparison. */
150 struct keyfield *next; /* Next keyfield to try. */
153 struct month
155 char const *name;
156 int val;
159 /* The name this program was run with. */
160 char *program_name;
162 /* FIXME: None of these tables work with multibyte character sets.
163 Also, there are many other bugs when handling multibyte characters.
164 One way to fix this is to rewrite `sort' to use wide characters
165 internally, but doing this with good performance is a bit
166 tricky. */
168 /* Table of blanks. */
169 static bool blanks[UCHAR_LIM];
171 /* Table of non-printing characters. */
172 static bool nonprinting[UCHAR_LIM];
174 /* Table of non-dictionary characters (not letters, digits, or blanks). */
175 static bool nondictionary[UCHAR_LIM];
177 /* Translation table folding lower case to upper. */
178 static char fold_toupper[UCHAR_LIM];
180 #define MONTHS_PER_YEAR 12
182 /* Table mapping month names to integers.
183 Alphabetic order allows binary search. */
184 static struct month monthtab[] =
186 {"APR", 4},
187 {"AUG", 8},
188 {"DEC", 12},
189 {"FEB", 2},
190 {"JAN", 1},
191 {"JUL", 7},
192 {"JUN", 6},
193 {"MAR", 3},
194 {"MAY", 5},
195 {"NOV", 11},
196 {"OCT", 10},
197 {"SEP", 9}
200 /* During the merge phase, the number of files to merge at once. */
201 #define NMERGE 16
203 /* Minimum size for a merge or check buffer. */
204 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
206 /* Minimum sort size; the code might not work with smaller sizes. */
207 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
209 /* The number of bytes needed for a merge or check buffer, which can
210 function relatively efficiently even if it holds only one line. If
211 a longer line is seen, this value is increased. */
212 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
214 /* The approximate maximum number of bytes of main memory to use, as
215 specified by the user. Zero if the user has not specified a size. */
216 static size_t sort_size;
218 /* The guessed size for non-regular files. */
219 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
221 /* Array of directory names in which any temporary files are to be created. */
222 static char const **temp_dirs;
224 /* Number of temporary directory names used. */
225 static size_t temp_dir_count;
227 /* Number of allocated slots in temp_dirs. */
228 static size_t temp_dir_alloc;
230 /* Flag to reverse the order of all comparisons. */
231 static bool reverse;
233 /* Flag for stable sort. This turns off the last ditch bytewise
234 comparison of lines, and instead leaves lines in the same order
235 they were read if all keys compare equal. */
236 static bool stable;
238 /* If TAB has this value, blanks separate fields. */
239 enum { TAB_DEFAULT = CHAR_MAX + 1 };
241 /* Tab character separating fields. If TAB_DEFAULT, then fields are
242 separated by the empty string between a non-blank character and a blank
243 character. */
244 static int tab = TAB_DEFAULT;
246 /* Flag to remove consecutive duplicate lines from the output.
247 Only the last of a sequence of equal lines will be output. */
248 static bool unique;
250 /* Nonzero if any of the input files are the standard input. */
251 static bool have_read_stdin;
253 /* List of key field comparisons to be tried. */
254 static struct keyfield *keylist;
256 static void sortlines_temp (struct line *, size_t, struct line *);
258 /* Report MESSAGE for FILE, then clean up and exit.
259 If FILE is null, it represents standard output. */
261 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
262 static void
263 die (char const *message, char const *file)
265 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
266 exit (SORT_FAILURE);
269 void
270 usage (int status)
272 if (status != EXIT_SUCCESS)
273 fprintf (stderr, _("Try `%s --help' for more information.\n"),
274 program_name);
275 else
277 printf (_("\
278 Usage: %s [OPTION]... [FILE]...\n\
280 program_name);
281 fputs (_("\
282 Write sorted concatenation of all FILE(s) to standard output.\n\
284 Ordering options:\n\
286 "), stdout);
287 fputs (_("\
288 Mandatory arguments to long options are mandatory for short options too.\n\
289 "), stdout);
290 fputs (_("\
291 -b, --ignore-leading-blanks ignore leading blanks\n\
292 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
293 -f, --ignore-case fold lower case to upper case characters\n\
294 "), stdout);
295 fputs (_("\
296 -g, --general-numeric-sort compare according to general numerical value\n\
297 -i, --ignore-nonprinting consider only printable characters\n\
298 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
299 -n, --numeric-sort compare according to string numerical value\n\
300 -r, --reverse reverse the result of comparisons\n\
302 "), stdout);
303 fputs (_("\
304 Other options:\n\
306 -c, --check check whether input is sorted; do not sort\n\
307 -k, --key=POS1[,POS2] start a key at POS1, end it at POS 2 (origin 1)\n\
308 -m, --merge merge already sorted files; do not sort\n\
309 -o, --output=FILE write result to FILE instead of standard output\n\
310 -s, --stable stabilize sort by disabling last-resort comparison\n\
311 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
312 "), stdout);
313 printf (_("\
314 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
315 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
316 multiple options specify multiple directories\n\
317 -u, --unique with -c, check for strict ordering;\n\
318 without -c, output only the first of an equal run\n\
319 "), DEFAULT_TMPDIR);
320 fputs (_("\
321 -z, --zero-terminated end lines with 0 byte, not newline\n\
322 "), stdout);
323 fputs (HELP_OPTION_DESCRIPTION, stdout);
324 fputs (VERSION_OPTION_DESCRIPTION, stdout);
325 fputs (_("\
327 POS is F[.C][OPTS], where F is the field number and C the character position\n\
328 in the field. OPTS is one or more single-letter ordering options, which\n\
329 override global ordering options for that key. If no key is given, use the\n\
330 entire line as the key.\n\
332 SIZE may be followed by the following multiplicative suffixes:\n\
333 "), stdout);
334 fputs (_("\
335 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
337 With no FILE, or when FILE is -, read standard input.\n\
339 *** WARNING ***\n\
340 The locale specified by the environment affects sort order.\n\
341 Set LC_ALL=C to get the traditional sort order that uses\n\
342 native byte values.\n\
343 "), stdout );
344 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
347 exit (status);
350 #define COMMON_SHORT_OPTIONS "-bcdfgik:mMno:rsS:t:T:uz"
352 static struct option const long_options[] =
354 {"ignore-leading-blanks", no_argument, NULL, 'b'},
355 {"check", no_argument, NULL, 'c'},
356 {"dictionary-order", no_argument, NULL, 'd'},
357 {"ignore-case", no_argument, NULL, 'f'},
358 {"general-numeric-sort", no_argument, NULL, 'g'},
359 {"ignore-nonprinting", no_argument, NULL, 'i'},
360 {"key", required_argument, NULL, 'k'},
361 {"merge", no_argument, NULL, 'm'},
362 {"month-sort", no_argument, NULL, 'M'},
363 {"numeric-sort", no_argument, NULL, 'n'},
364 {"output", required_argument, NULL, 'o'},
365 {"reverse", no_argument, NULL, 'r'},
366 {"stable", no_argument, NULL, 's'},
367 {"buffer-size", required_argument, NULL, 'S'},
368 {"field-separator", required_argument, NULL, 't'},
369 {"temporary-directory", required_argument, NULL, 'T'},
370 {"unique", no_argument, NULL, 'u'},
371 {"zero-terminated", no_argument, NULL, 'z'},
372 {GETOPT_HELP_OPTION_DECL},
373 {GETOPT_VERSION_OPTION_DECL},
374 {0, 0, 0, 0},
377 /* The set of signals that are caught. */
378 static sigset_t caught_signals;
380 /* The list of temporary files. */
381 struct tempnode
383 struct tempnode *volatile next;
384 char name[1]; /* Actual size is 1 + file name length. */
386 static struct tempnode *volatile temphead;
387 static struct tempnode *volatile *temptail = &temphead;
389 /* Clean up any remaining temporary files. */
391 static void
392 cleanup (void)
394 struct tempnode const *node;
396 for (node = temphead; node; node = node->next)
397 unlink (node->name);
400 /* Create a new temporary file, returning its newly allocated name.
401 Store into *PFP a stream open for writing. */
403 static char *
404 create_temp_file (FILE **pfp)
406 static char const slashbase[] = "/sortXXXXXX";
407 static size_t temp_dir_index;
408 sigset_t oldset;
409 int fd;
410 int saved_errno;
411 char const *temp_dir = temp_dirs[temp_dir_index];
412 size_t len = strlen (temp_dir);
413 struct tempnode *node =
414 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
415 char *file = node->name;
417 memcpy (file, temp_dir, len);
418 memcpy (file + len, slashbase, sizeof slashbase);
419 node->next = NULL;
420 if (++temp_dir_index == temp_dir_count)
421 temp_dir_index = 0;
423 /* Create the temporary file in a critical section, to avoid races. */
424 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
425 fd = mkstemp (file);
426 if (0 <= fd)
428 *temptail = node;
429 temptail = &node->next;
431 saved_errno = errno;
432 sigprocmask (SIG_SETMASK, &oldset, NULL);
433 errno = saved_errno;
435 if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
436 die (_("cannot create temporary file"), file);
438 return file;
441 /* Return a stream for FILE, opened with mode HOW. A null FILE means
442 standard output; HOW should be "w". When opening for input, "-"
443 means standard input. To avoid confusion, do not return file
444 descriptors 0, 1, or 2. */
446 static FILE *
447 xfopen (const char *file, const char *how)
449 FILE *fp;
451 if (!file)
452 fp = stdout;
453 else if (STREQ (file, "-") && *how == 'r')
455 have_read_stdin = true;
456 fp = stdin;
458 else
460 if ((fp = fopen_safer (file, how)) == NULL)
461 die (_("open failed"), file);
464 return fp;
467 /* Close FP, whose name is FILE, and report any errors. */
469 static void
470 xfclose (FILE *fp, char const *file)
472 if (fp == stdin)
474 /* Allow reading stdin from tty more than once. */
475 if (feof (fp))
476 clearerr (fp);
478 else if (fp == stdout)
480 /* Don't close stdout just yet. close_stdout does that. */
481 if (fflush (fp) != 0)
482 die (_("fflush failed"), file);
484 else
486 if (fclose (fp) != 0)
487 die (_("close failed"), file);
491 static void
492 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
494 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
495 die (_("write failed"), output_file);
498 /* Append DIR to the array of temporary directory names. */
499 static void
500 add_temp_dir (char const *dir)
502 if (temp_dir_count == temp_dir_alloc)
503 temp_dirs = x2nrealloc (temp_dirs, &temp_dir_alloc, sizeof *temp_dirs);
505 temp_dirs[temp_dir_count++] = dir;
508 /* Remove NAME from the list of temporary files. */
510 static void
511 zaptemp (const char *name)
513 struct tempnode *volatile *pnode;
514 struct tempnode *node;
515 struct tempnode *next;
516 sigset_t oldset;
517 int unlink_status;
518 int unlink_errno = 0;
520 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
521 continue;
523 /* Unlink the temporary file in a critical section to avoid races. */
524 next = node->next;
525 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
526 unlink_status = unlink (name);
527 unlink_errno = errno;
528 *pnode = next;
529 sigprocmask (SIG_SETMASK, &oldset, NULL);
531 if (unlink_status != 0)
532 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
533 if (! next)
534 temptail = pnode;
535 free (node);
538 #if HAVE_NL_LANGINFO
540 static int
541 struct_month_cmp (const void *m1, const void *m2)
543 struct month const *month1 = m1;
544 struct month const *month2 = m2;
545 return strcmp (month1->name, month2->name);
548 #endif
550 /* Initialize the character class tables. */
552 static void
553 inittables (void)
555 size_t i;
557 for (i = 0; i < UCHAR_LIM; ++i)
559 blanks[i] = !!ISBLANK (i);
560 nonprinting[i] = !ISPRINT (i);
561 nondictionary[i] = !ISALNUM (i) && !ISBLANK (i);
562 fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i);
565 #if HAVE_NL_LANGINFO
566 /* If we're not in the "C" locale, read different names for months. */
567 if (hard_LC_TIME)
569 for (i = 0; i < MONTHS_PER_YEAR; i++)
571 char const *s;
572 size_t s_len;
573 size_t j;
574 char *name;
576 s = (char *) nl_langinfo (ABMON_1 + i);
577 s_len = strlen (s);
578 monthtab[i].name = name = xmalloc (s_len + 1);
579 monthtab[i].val = i + 1;
581 for (j = 0; j < s_len; j++)
582 name[j] = fold_toupper[to_uchar (s[j])];
583 name[j] = '\0';
585 qsort ((void *) monthtab, MONTHS_PER_YEAR,
586 sizeof *monthtab, struct_month_cmp);
588 #endif
591 /* Specify the amount of main memory to use when sorting. */
592 static void
593 specify_sort_size (char const *s)
595 uintmax_t n;
596 char *suffix;
597 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
599 /* The default unit is KiB. */
600 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
602 if (n <= UINTMAX_MAX / 1024)
603 n *= 1024;
604 else
605 e = LONGINT_OVERFLOW;
608 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
609 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
610 switch (suffix[0])
612 case 'b':
613 e = LONGINT_OK;
614 break;
616 case '%':
618 double mem = physmem_total () * n / 100;
620 /* Use "<", not "<=", to avoid problems with rounding. */
621 if (mem < UINTMAX_MAX)
623 n = mem;
624 e = LONGINT_OK;
626 else
627 e = LONGINT_OVERFLOW;
629 break;
632 if (e == LONGINT_OK)
634 /* If multiple sort sizes are specified, take the maximum, so
635 that option order does not matter. */
636 if (n < sort_size)
637 return;
639 sort_size = n;
640 if (sort_size == n)
642 sort_size = MAX (sort_size, MIN_SORT_SIZE);
643 return;
646 e = LONGINT_OVERFLOW;
649 STRTOL_FATAL_ERROR (s, _("sort size"), e);
652 /* Return the default sort size. */
653 static size_t
654 default_sort_size (void)
656 /* Let MEM be available memory or 1/8 of total memory, whichever
657 is greater. */
658 double avail = physmem_available ();
659 double total = physmem_total ();
660 double mem = MAX (avail, total / 8);
661 struct rlimit rlimit;
663 /* Let SIZE be MEM, but no more than the maximum object size or
664 system resource limits. Avoid the MIN macro here, as it is not
665 quite right when only one argument is floating point. Don't
666 bother to check for values like RLIM_INFINITY since in practice
667 they are not much less than SIZE_MAX. */
668 size_t size = SIZE_MAX;
669 if (mem < size)
670 size = mem;
671 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
672 size = rlimit.rlim_cur;
673 #ifdef RLIMIT_AS
674 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
675 size = rlimit.rlim_cur;
676 #endif
678 /* Leave a large safety margin for the above limits, as failure can
679 occur when they are exceeded. */
680 size /= 2;
682 #ifdef RLIMIT_RSS
683 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
684 Exceeding RSS is not fatal, but can be quite slow. */
685 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
686 size = rlimit.rlim_cur / 16 * 15;
687 #endif
689 /* Use no less than the minimum. */
690 return MAX (size, MIN_SORT_SIZE);
693 /* Return the sort buffer size to use with the input files identified
694 by FPS and FILES, which are alternate paths to the same files.
695 NFILES gives the number of input files; NFPS may be less. Assume
696 that each input line requires LINE_BYTES extra bytes' worth of line
697 information. Do not exceed a bound on the size: if the bound is
698 not specified by the user, use a default. */
700 static size_t
701 sort_buffer_size (FILE *const *fps, size_t nfps,
702 char *const *files, size_t nfiles,
703 size_t line_bytes)
705 /* A bound on the input size. If zero, the bound hasn't been
706 determined yet. */
707 static size_t size_bound;
709 /* In the worst case, each input byte is a newline. */
710 size_t worst_case_per_input_byte = line_bytes + 1;
712 /* Keep enough room for one extra input line and an extra byte.
713 This extra room might be needed when preparing to read EOF. */
714 size_t size = worst_case_per_input_byte + 1;
716 size_t i;
718 for (i = 0; i < nfiles; i++)
720 struct stat st;
721 off_t file_size;
722 size_t worst_case;
724 if ((i < nfps ? fstat (fileno (fps[i]), &st)
725 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
726 : stat (files[i], &st))
727 != 0)
728 die (_("stat failed"), files[i]);
730 if (S_ISREG (st.st_mode))
731 file_size = st.st_size;
732 else
734 /* The file has unknown size. If the user specified a sort
735 buffer size, use that; otherwise, guess the size. */
736 if (sort_size)
737 return sort_size;
738 file_size = INPUT_FILE_SIZE_GUESS;
741 if (! size_bound)
743 size_bound = sort_size;
744 if (! size_bound)
745 size_bound = default_sort_size ();
748 /* Add the amount of memory needed to represent the worst case
749 where the input consists entirely of newlines followed by a
750 single non-newline. Check for overflow. */
751 worst_case = file_size * worst_case_per_input_byte + 1;
752 if (file_size != worst_case / worst_case_per_input_byte
753 || size_bound - size <= worst_case)
754 return size_bound;
755 size += worst_case;
758 return size;
761 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
762 must be at least sizeof (struct line). Allocate ALLOC bytes
763 initially. */
765 static void
766 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
768 /* Ensure that the line array is properly aligned. If the desired
769 size cannot be allocated, repeatedly halve it until allocation
770 succeeds. The smaller allocation may hurt overall performance,
771 but that's better than failing. */
772 for (;;)
774 alloc += sizeof (struct line) - alloc % sizeof (struct line);
775 buf->buf = malloc (alloc);
776 if (buf->buf)
777 break;
778 alloc /= 2;
779 if (alloc <= line_bytes + 1)
780 xalloc_die ();
783 buf->line_bytes = line_bytes;
784 buf->alloc = alloc;
785 buf->used = buf->left = buf->nlines = 0;
786 buf->eof = false;
789 /* Return one past the limit of the line array. */
791 static inline struct line *
792 buffer_linelim (struct buffer const *buf)
794 return (struct line *) (buf->buf + buf->alloc);
797 /* Return a pointer to the first character of the field specified
798 by KEY in LINE. */
800 static char *
801 begfield (const struct line *line, const struct keyfield *key)
803 register char *ptr = line->text, *lim = ptr + line->length - 1;
804 register size_t sword = key->sword;
805 register size_t schar = key->schar;
806 register size_t remaining_bytes;
808 /* The leading field separator itself is included in a field when -t
809 is absent. */
811 if (tab != TAB_DEFAULT)
812 while (ptr < lim && sword--)
814 while (ptr < lim && *ptr != tab)
815 ++ptr;
816 if (ptr < lim)
817 ++ptr;
819 else
820 while (ptr < lim && sword--)
822 while (ptr < lim && blanks[to_uchar (*ptr)])
823 ++ptr;
824 while (ptr < lim && !blanks[to_uchar (*ptr)])
825 ++ptr;
828 if (key->skipsblanks)
829 while (ptr < lim && blanks[to_uchar (*ptr)])
830 ++ptr;
832 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
833 remaining_bytes = lim - ptr;
834 if (schar < remaining_bytes)
835 ptr += schar;
836 else
837 ptr = lim;
839 return ptr;
842 /* Return the limit of (a pointer to the first character after) the field
843 in LINE specified by KEY. */
845 static char *
846 limfield (const struct line *line, const struct keyfield *key)
848 register char *ptr = line->text, *lim = ptr + line->length - 1;
849 register size_t eword = key->eword, echar = key->echar;
850 register size_t remaining_bytes;
852 /* Move PTR past EWORD fields or to one past the last byte on LINE,
853 whichever comes first. If there are more than EWORD fields, leave
854 PTR pointing at the beginning of the field having zero-based index,
855 EWORD. If a delimiter character was specified (via -t), then that
856 `beginning' is the first character following the delimiting TAB.
857 Otherwise, leave PTR pointing at the first `blank' character after
858 the preceding field. */
859 if (tab != TAB_DEFAULT)
860 while (ptr < lim && eword--)
862 while (ptr < lim && *ptr != tab)
863 ++ptr;
864 if (ptr < lim && (eword | echar))
865 ++ptr;
867 else
868 while (ptr < lim && eword--)
870 while (ptr < lim && blanks[to_uchar (*ptr)])
871 ++ptr;
872 while (ptr < lim && !blanks[to_uchar (*ptr)])
873 ++ptr;
876 #ifdef POSIX_UNSPECIFIED
877 /* The following block of code makes GNU sort incompatible with
878 standard Unix sort, so it's ifdef'd out for now.
879 The POSIX spec isn't clear on how to interpret this.
880 FIXME: request clarification.
882 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
883 Date: Thu, 30 May 96 12:20:41 -0400
884 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
886 [...]I believe I've found another bug in `sort'.
888 $ cat /tmp/sort.in
889 a b c 2 d
890 pq rs 1 t
891 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
892 a b c 2 d
893 pq rs 1 t
894 $ /bin/sort -k1.7,1.7 </tmp/sort.in
895 pq rs 1 t
896 a b c 2 d
898 Unix sort produced the answer I expected: sort on the single character
899 in column 7. GNU sort produced different results, because it disagrees
900 on the interpretation of the key-end spec "M.N". Unix sort reads this
901 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
902 "skip M-1 fields, then either N-1 characters or the rest of the current
903 field, whichever comes first". This extra clause applies only to
904 key-ends, not key-starts.
907 /* Make LIM point to the end of (one byte past) the current field. */
908 if (tab != TAB_DEFAULT)
910 char *newlim;
911 newlim = memchr (ptr, tab, lim - ptr);
912 if (newlim)
913 lim = newlim;
915 else
917 char *newlim;
918 newlim = ptr;
919 while (newlim < lim && blanks[to_uchar (*newlim)])
920 ++newlim;
921 while (newlim < lim && !blanks[to_uchar (*newlim)])
922 ++newlim;
923 lim = newlim;
925 #endif
927 /* If we're ignoring leading blanks when computing the End
928 of the field, don't start counting bytes until after skipping
929 past any leading blanks. */
930 if (key->skipeblanks)
931 while (ptr < lim && blanks[to_uchar (*ptr)])
932 ++ptr;
934 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
935 remaining_bytes = lim - ptr;
936 if (echar < remaining_bytes)
937 ptr += echar;
938 else
939 ptr = lim;
941 return ptr;
944 /* Fill BUF reading from FP, moving buf->left bytes from the end
945 of buf->buf to the beginning first. If EOF is reached and the
946 file wasn't terminated by a newline, supply one. Set up BUF's line
947 table too. FILE is the name of the file corresponding to FP.
948 Return true if some input was read. */
950 static bool
951 fillbuf (struct buffer *buf, register FILE *fp, char const *file)
953 struct keyfield const *key = keylist;
954 char eol = eolchar;
955 size_t line_bytes = buf->line_bytes;
956 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
958 if (buf->eof)
959 return false;
961 if (buf->used != buf->left)
963 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
964 buf->used = buf->left;
965 buf->nlines = 0;
968 for (;;)
970 char *ptr = buf->buf + buf->used;
971 struct line *linelim = buffer_linelim (buf);
972 struct line *line = linelim - buf->nlines;
973 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
974 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
976 while (line_bytes + 1 < avail)
978 /* Read as many bytes as possible, but do not read so many
979 bytes that there might not be enough room for the
980 corresponding line array. The worst case is when the
981 rest of the input file consists entirely of newlines,
982 except that the last byte is not a newline. */
983 size_t readsize = (avail - 1) / (line_bytes + 1);
984 size_t bytes_read = fread (ptr, 1, readsize, fp);
985 char *ptrlim = ptr + bytes_read;
986 char *p;
987 avail -= bytes_read;
989 if (bytes_read != readsize)
991 if (ferror (fp))
992 die (_("read failed"), file);
993 if (feof (fp))
995 buf->eof = true;
996 if (buf->buf == ptrlim)
997 return false;
998 if (ptrlim[-1] != eol)
999 *ptrlim++ = eol;
1003 /* Find and record each line in the just-read input. */
1004 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1006 ptr = p + 1;
1007 line--;
1008 line->text = line_start;
1009 line->length = ptr - line_start;
1010 mergesize = MAX (mergesize, line->length);
1011 avail -= line_bytes;
1013 if (key)
1015 /* Precompute the position of the first key for
1016 efficiency. */
1017 line->keylim = (key->eword == SIZE_MAX
1019 : limfield (line, key));
1021 if (key->sword != SIZE_MAX)
1022 line->keybeg = begfield (line, key);
1023 else
1025 if (key->skipsblanks)
1026 while (blanks[to_uchar (*line_start)])
1027 line_start++;
1028 line->keybeg = line_start;
1032 line_start = ptr;
1035 ptr = ptrlim;
1036 if (buf->eof)
1037 break;
1040 buf->used = ptr - buf->buf;
1041 buf->nlines = buffer_linelim (buf) - line;
1042 if (buf->nlines != 0)
1044 buf->left = ptr - line_start;
1045 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1046 return true;
1049 /* The current input line is too long to fit in the buffer.
1050 Double the buffer size and try again. */
1051 buf->buf = x2nrealloc (buf->buf, &buf->alloc, sizeof *(buf->buf));
1055 /* Compare strings A and B containing decimal fractions < 1. Each string
1056 should begin with a decimal point followed immediately by the digits
1057 of the fraction. Strings not of this form are considered to be zero. */
1059 /* The goal here, is to take two numbers a and b... compare these
1060 in parallel. Instead of converting each, and then comparing the
1061 outcome. Most likely stopping the comparison before the conversion
1062 is complete. The algorithm used, in the old sort:
1064 Algorithm: fraccompare
1065 Action : compare two decimal fractions
1066 accepts : char *a, char *b
1067 returns : -1 if a<b, 0 if a=b, 1 if a>b.
1068 implement:
1070 if *a == decimal_point AND *b == decimal_point
1071 find first character different in a and b.
1072 if both are digits, return the difference *a - *b.
1073 if *a is a digit
1074 skip past zeros
1075 if digit return 1, else 0
1076 if *b is a digit
1077 skip past zeros
1078 if digit return -1, else 0
1079 if *a is a decimal_point
1080 skip past decimal_point and zeros
1081 if digit return 1, else 0
1082 if *b is a decimal_point
1083 skip past decimal_point and zeros
1084 if digit return -1, else 0
1085 return 0 */
1087 static int
1088 fraccompare (register const char *a, register const char *b)
1090 if (*a == decimal_point && *b == decimal_point)
1092 while (*++a == *++b)
1093 if (! ISDIGIT (*a))
1094 return 0;
1095 if (ISDIGIT (*a) && ISDIGIT (*b))
1096 return *a - *b;
1097 if (ISDIGIT (*a))
1098 goto a_trailing_nonzero;
1099 if (ISDIGIT (*b))
1100 goto b_trailing_nonzero;
1101 return 0;
1103 else if (*a++ == decimal_point)
1105 a_trailing_nonzero:
1106 while (*a == NUMERIC_ZERO)
1107 a++;
1108 return ISDIGIT (*a);
1110 else if (*b++ == decimal_point)
1112 b_trailing_nonzero:
1113 while (*b == NUMERIC_ZERO)
1114 b++;
1115 return - ISDIGIT (*b);
1117 return 0;
1120 /* Compare strings A and B as numbers without explicitly converting them to
1121 machine numbers. Comparatively slow for short strings, but asymptotically
1122 hideously fast. */
1124 static int
1125 numcompare (register const char *a, register const char *b)
1127 char tmpa;
1128 char tmpb;
1129 int tmp;
1130 size_t log_a;
1131 size_t log_b;
1133 while (blanks[to_uchar (tmpa = *a)])
1134 a++;
1135 while (blanks[to_uchar (tmpb = *b)])
1136 b++;
1138 if (tmpa == NEGATION_SIGN)
1141 tmpa = *++a;
1142 while (tmpa == NUMERIC_ZERO || tmpa == thousands_sep);
1143 if (tmpb != NEGATION_SIGN)
1145 if (tmpa == decimal_point)
1147 tmpa = *++a;
1148 while (tmpa == NUMERIC_ZERO);
1149 if (ISDIGIT (tmpa))
1150 return -1;
1151 while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep)
1152 tmpb = *++b;
1153 if (tmpb == decimal_point)
1155 tmpb = *++b;
1156 while (tmpb == NUMERIC_ZERO);
1157 return - ISDIGIT (tmpb);
1160 tmpb = *++b;
1161 while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep);
1163 while (tmpa == tmpb && ISDIGIT (tmpa))
1166 tmpa = *++a;
1167 while (tmpa == thousands_sep);
1169 tmpb = *++b;
1170 while (tmpb == thousands_sep);
1173 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1174 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1175 return fraccompare (b, a);
1177 tmp = tmpb - tmpa;
1179 for (log_a = 0; ISDIGIT (tmpa); ++log_a)
1181 tmpa = *++a;
1182 while (tmpa == thousands_sep);
1184 for (log_b = 0; ISDIGIT (tmpb); ++log_b)
1186 tmpb = *++b;
1187 while (tmpb == thousands_sep);
1189 if (log_a != log_b)
1190 return log_a < log_b ? 1 : -1;
1192 if (!log_a)
1193 return 0;
1195 return tmp;
1197 else if (tmpb == NEGATION_SIGN)
1200 tmpb = *++b;
1201 while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep);
1202 if (tmpb == decimal_point)
1204 tmpb = *++b;
1205 while (tmpb == NUMERIC_ZERO);
1206 if (ISDIGIT (tmpb))
1207 return 1;
1208 while (tmpa == NUMERIC_ZERO || tmpa == thousands_sep)
1209 tmpa = *++a;
1210 if (tmpa == decimal_point)
1212 tmpa = *++a;
1213 while (tmpa == NUMERIC_ZERO);
1214 return ISDIGIT (tmpa);
1216 else
1218 while (tmpa == NUMERIC_ZERO || tmpa == thousands_sep)
1219 tmpa = *++a;
1220 while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep)
1221 tmpb = *++b;
1223 while (tmpa == tmpb && ISDIGIT (tmpa))
1226 tmpa = *++a;
1227 while (tmpa == thousands_sep);
1229 tmpb = *++b;
1230 while (tmpb == thousands_sep);
1233 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1234 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1235 return fraccompare (a, b);
1237 tmp = tmpa - tmpb;
1239 for (log_a = 0; ISDIGIT (tmpa); ++log_a)
1241 tmpa = *++a;
1242 while (tmpa == thousands_sep);
1244 for (log_b = 0; ISDIGIT (tmpb); ++log_b)
1246 tmpb = *++b;
1247 while (tmpb == thousands_sep);
1249 if (log_a != log_b)
1250 return log_a < log_b ? -1 : 1;
1252 if (!log_a)
1253 return 0;
1255 return tmp;
1259 static int
1260 general_numcompare (const char *sa, const char *sb)
1262 /* FIXME: add option to warn about failed conversions. */
1263 /* FIXME: maybe add option to try expensive FP conversion
1264 only if A and B can't be compared more cheaply/accurately. */
1266 char *ea;
1267 char *eb;
1268 double a = strtod (sa, &ea);
1269 double b = strtod (sb, &eb);
1271 /* Put conversion errors at the start of the collating sequence. */
1272 if (sa == ea)
1273 return sb == eb ? 0 : -1;
1274 if (sb == eb)
1275 return 1;
1277 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1278 conversion errors but before numbers; sort them by internal
1279 bit-pattern, for lack of a more portable alternative. */
1280 return (a < b ? -1
1281 : a > b ? 1
1282 : a == b ? 0
1283 : b == b ? -1
1284 : a == a ? 1
1285 : memcmp ((char *) &a, (char *) &b, sizeof a));
1288 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1289 Return 0 if the name in S is not recognized. */
1291 static int
1292 getmonth (char const *month, size_t len)
1294 size_t lo = 0;
1295 size_t hi = MONTHS_PER_YEAR;
1296 char const *monthlim = month + len;
1298 for (;;)
1300 if (month == monthlim)
1301 return 0;
1302 if (!blanks[to_uchar (*month)])
1303 break;
1304 ++month;
1309 size_t ix = (lo + hi) / 2;
1310 char const *m = month;
1311 char const *n = monthtab[ix].name;
1313 for (;; m++, n++)
1315 if (!*n)
1316 return monthtab[ix].val;
1317 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1319 hi = ix;
1320 break;
1322 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1324 lo = ix + 1;
1325 break;
1329 while (lo < hi);
1331 return 0;
1334 /* Compare two lines A and B trying every key in sequence until there
1335 are no more keys or a difference is found. */
1337 static int
1338 keycompare (const struct line *a, const struct line *b)
1340 struct keyfield const *key = keylist;
1342 /* For the first iteration only, the key positions have been
1343 precomputed for us. */
1344 register char *texta = a->keybeg;
1345 register char *textb = b->keybeg;
1346 register char *lima = a->keylim;
1347 register char *limb = b->keylim;
1349 int diff;
1351 for (;;)
1353 register char const *translate = key->translate;
1354 register bool const *ignore = key->ignore;
1356 /* Find the lengths. */
1357 size_t lena = lima <= texta ? 0 : lima - texta;
1358 size_t lenb = limb <= textb ? 0 : limb - textb;
1360 /* Actually compare the fields. */
1361 if (key->numeric | key->general_numeric)
1363 char savea = *lima, saveb = *limb;
1365 *lima = *limb = '\0';
1366 diff = ((key->numeric ? numcompare : general_numcompare)
1367 (texta, textb));
1368 *lima = savea, *limb = saveb;
1370 else if (key->month)
1371 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1372 /* Sorting like this may become slow, so in a simple locale the user
1373 can select a faster sort that is similar to ascii sort. */
1374 else if (hard_LC_COLLATE)
1376 if (ignore || translate)
1378 char buf[4000];
1379 size_t size = lena + 1 + lenb + 1;
1380 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1381 char *copy_b = copy_a + lena + 1;
1382 size_t new_len_a, new_len_b, i;
1384 /* Ignore and/or translate chars before comparing. */
1385 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1387 if (i < lena)
1389 copy_a[new_len_a] = (translate
1390 ? translate[to_uchar (texta[i])]
1391 : texta[i]);
1392 if (!ignore || !ignore[to_uchar (texta[i])])
1393 ++new_len_a;
1395 if (i < lenb)
1397 copy_b[new_len_b] = (translate
1398 ? translate[to_uchar (textb[i])]
1399 : textb [i]);
1400 if (!ignore || !ignore[to_uchar (textb[i])])
1401 ++new_len_b;
1405 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1407 if (sizeof buf < size)
1408 free (copy_a);
1410 else if (lena == 0)
1411 diff = - NONZERO (lenb);
1412 else if (lenb == 0)
1413 goto greater;
1414 else
1415 diff = xmemcoll (texta, lena, textb, lenb);
1417 else if (ignore)
1419 #define CMP_WITH_IGNORE(A, B) \
1420 do \
1422 for (;;) \
1424 while (texta < lima && ignore[to_uchar (*texta)]) \
1425 ++texta; \
1426 while (textb < limb && ignore[to_uchar (*textb)]) \
1427 ++textb; \
1428 if (! (texta < lima && textb < limb)) \
1429 break; \
1430 diff = to_uchar (A) - to_uchar (B); \
1431 if (diff) \
1432 goto not_equal; \
1433 ++texta; \
1434 ++textb; \
1437 diff = (texta < lima) - (textb < limb); \
1439 while (0)
1441 if (translate)
1442 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1443 translate[to_uchar (*textb)]);
1444 else
1445 CMP_WITH_IGNORE (*texta, *textb);
1447 else if (lena == 0)
1448 diff = - NONZERO (lenb);
1449 else if (lenb == 0)
1450 goto greater;
1451 else
1453 if (translate)
1455 while (texta < lima && textb < limb)
1457 diff = (to_uchar (translate[to_uchar (*texta++)])
1458 - to_uchar (translate[to_uchar (*textb++)]));
1459 if (diff)
1460 goto not_equal;
1463 else
1465 diff = memcmp (texta, textb, MIN (lena, lenb));
1466 if (diff)
1467 goto not_equal;
1469 diff = lena < lenb ? -1 : lena != lenb;
1472 if (diff)
1473 goto not_equal;
1475 key = key->next;
1476 if (! key)
1477 break;
1479 /* Find the beginning and limit of the next field. */
1480 if (key->eword != SIZE_MAX)
1481 lima = limfield (a, key), limb = limfield (b, key);
1482 else
1483 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1485 if (key->sword != SIZE_MAX)
1486 texta = begfield (a, key), textb = begfield (b, key);
1487 else
1489 texta = a->text, textb = b->text;
1490 if (key->skipsblanks)
1492 while (texta < lima && blanks[to_uchar (*texta)])
1493 ++texta;
1494 while (textb < limb && blanks[to_uchar (*textb)])
1495 ++textb;
1500 return 0;
1502 greater:
1503 diff = 1;
1504 not_equal:
1505 return key->reverse ? -diff : diff;
1508 /* Compare two lines A and B, returning negative, zero, or positive
1509 depending on whether A compares less than, equal to, or greater than B. */
1511 static int
1512 compare (register const struct line *a, register const struct line *b)
1514 int diff;
1515 size_t alen, blen;
1517 /* First try to compare on the specified keys (if any).
1518 The only two cases with no key at all are unadorned sort,
1519 and unadorned sort -r. */
1520 if (keylist)
1522 diff = keycompare (a, b);
1523 if (diff | unique | stable)
1524 return diff;
1527 /* If the keys all compare equal (or no keys were specified)
1528 fall through to the default comparison. */
1529 alen = a->length - 1, blen = b->length - 1;
1531 if (alen == 0)
1532 diff = - NONZERO (blen);
1533 else if (blen == 0)
1534 diff = 1;
1535 else if (hard_LC_COLLATE)
1536 diff = xmemcoll (a->text, alen, b->text, blen);
1537 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1538 diff = alen < blen ? -1 : alen != blen;
1540 return reverse ? -diff : diff;
1543 /* Check that the lines read from FILE_NAME come in order. Print a
1544 diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1545 false if they are not in order. Otherwise, print no diagnostic
1546 and return true. */
1548 static bool
1549 check (char const *file_name)
1551 FILE *fp = xfopen (file_name, "r");
1552 struct buffer buf; /* Input buffer. */
1553 struct line temp; /* Copy of previous line. */
1554 size_t alloc = 0;
1555 uintmax_t line_number = 0;
1556 struct keyfield const *key = keylist;
1557 bool nonunique = ! unique;
1558 bool ordered = true;
1560 initbuf (&buf, sizeof (struct line),
1561 MAX (merge_buffer_size, sort_size));
1562 temp.text = NULL;
1564 while (fillbuf (&buf, fp, file_name))
1566 struct line const *line = buffer_linelim (&buf);
1567 struct line const *linebase = line - buf.nlines;
1569 /* Make sure the line saved from the old buffer contents is
1570 less than or equal to the first line of the new buffer. */
1571 if (alloc && nonunique <= compare (&temp, line - 1))
1573 found_disorder:
1575 struct line const *disorder_line = line - 1;
1576 uintmax_t disorder_line_number =
1577 buffer_linelim (&buf) - disorder_line + line_number;
1578 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1579 fprintf (stderr, _("%s: %s:%s: disorder: "),
1580 program_name, file_name,
1581 umaxtostr (disorder_line_number, hr_buf));
1582 write_bytes (disorder_line->text, disorder_line->length, stderr,
1583 _("standard error"));
1584 ordered = false;
1585 break;
1589 /* Compare each line in the buffer with its successor. */
1590 while (linebase < --line)
1591 if (nonunique <= compare (line, line - 1))
1592 goto found_disorder;
1594 line_number += buf.nlines;
1596 /* Save the last line of the buffer. */
1597 if (alloc < line->length)
1601 alloc *= 2;
1602 if (! alloc)
1604 alloc = line->length;
1605 break;
1608 while (alloc < line->length);
1610 temp.text = xrealloc (temp.text, alloc);
1612 memcpy (temp.text, line->text, line->length);
1613 temp.length = line->length;
1614 if (key)
1616 temp.keybeg = temp.text + (line->keybeg - line->text);
1617 temp.keylim = temp.text + (line->keylim - line->text);
1621 xfclose (fp, file_name);
1622 free (buf.buf);
1623 if (temp.text)
1624 free (temp.text);
1625 return ordered;
1628 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
1629 files (all of which are at the start of the FILES array), and
1630 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
1631 Close input and output files before returning.
1632 OUTPUT_FILE gives the name of the output file. If it is NULL,
1633 the output file is standard output. If OFP is NULL, the output
1634 file has not been opened yet (or written to, if standard output). */
1636 static void
1637 mergefps (char **files, size_t ntemps, size_t nfiles,
1638 FILE *ofp, char const *output_file)
1640 FILE *fps[NMERGE]; /* Input streams for each file. */
1641 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1642 struct line saved; /* Saved line storage for unique check. */
1643 struct line const *savedline = NULL;
1644 /* &saved if there is a saved line. */
1645 size_t savealloc = 0; /* Size allocated for the saved line. */
1646 struct line const *cur[NMERGE]; /* Current line in each line table. */
1647 struct line const *base[NMERGE]; /* Base of each line table. */
1648 size_t ord[NMERGE]; /* Table representing a permutation of fps,
1649 such that cur[ord[0]] is the smallest line
1650 and will be next output. */
1651 size_t i;
1652 size_t j;
1653 size_t t;
1654 struct keyfield const *key = keylist;
1655 saved.text = NULL;
1657 /* Read initial lines from each input file. */
1658 for (i = 0; i < nfiles; )
1660 fps[i] = xfopen (files[i], "r");
1661 initbuf (&buffer[i], sizeof (struct line),
1662 MAX (merge_buffer_size, sort_size / nfiles));
1663 if (fillbuf (&buffer[i], fps[i], files[i]))
1665 struct line const *linelim = buffer_linelim (&buffer[i]);
1666 cur[i] = linelim - 1;
1667 base[i] = linelim - buffer[i].nlines;
1668 i++;
1670 else
1672 /* fps[i] is empty; eliminate it from future consideration. */
1673 xfclose (fps[i], files[i]);
1674 if (i < ntemps)
1676 ntemps--;
1677 zaptemp (files[i]);
1679 free (buffer[i].buf);
1680 --nfiles;
1681 for (j = i; j < nfiles; ++j)
1682 files[j] = files[j + 1];
1686 if (! ofp)
1687 ofp = xfopen (output_file, "w");
1689 /* Set up the ord table according to comparisons among input lines.
1690 Since this only reorders two items if one is strictly greater than
1691 the other, it is stable. */
1692 for (i = 0; i < nfiles; ++i)
1693 ord[i] = i;
1694 for (i = 1; i < nfiles; ++i)
1695 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
1696 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1698 /* Repeatedly output the smallest line until no input remains. */
1699 while (nfiles)
1701 struct line const *smallest = cur[ord[0]];
1703 /* If uniquified output is turned on, output only the first of
1704 an identical series of lines. */
1705 if (unique)
1707 if (savedline && compare (savedline, smallest))
1709 savedline = 0;
1710 write_bytes (saved.text, saved.length, ofp, output_file);
1712 if (!savedline)
1714 savedline = &saved;
1715 if (savealloc < smallest->length)
1718 if (! savealloc)
1720 savealloc = smallest->length;
1721 break;
1723 while ((savealloc *= 2) < smallest->length);
1725 saved.text = xrealloc (saved.text, savealloc);
1727 saved.length = smallest->length;
1728 memcpy (saved.text, smallest->text, saved.length);
1729 if (key)
1731 saved.keybeg =
1732 saved.text + (smallest->keybeg - smallest->text);
1733 saved.keylim =
1734 saved.text + (smallest->keylim - smallest->text);
1738 else
1739 write_bytes (smallest->text, smallest->length, ofp, output_file);
1741 /* Check if we need to read more lines into core. */
1742 if (base[ord[0]] < smallest)
1743 cur[ord[0]] = smallest - 1;
1744 else
1746 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
1748 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
1749 cur[ord[0]] = linelim - 1;
1750 base[ord[0]] = linelim - buffer[ord[0]].nlines;
1752 else
1754 /* We reached EOF on fps[ord[0]]. */
1755 for (i = 1; i < nfiles; ++i)
1756 if (ord[i] > ord[0])
1757 --ord[i];
1758 --nfiles;
1759 xfclose (fps[ord[0]], files[ord[0]]);
1760 if (ord[0] < ntemps)
1762 ntemps--;
1763 zaptemp (files[ord[0]]);
1765 free (buffer[ord[0]].buf);
1766 for (i = ord[0]; i < nfiles; ++i)
1768 fps[i] = fps[i + 1];
1769 files[i] = files[i + 1];
1770 buffer[i] = buffer[i + 1];
1771 cur[i] = cur[i + 1];
1772 base[i] = base[i + 1];
1774 for (i = 0; i < nfiles; ++i)
1775 ord[i] = ord[i + 1];
1776 continue;
1780 /* The new line just read in may be larger than other lines
1781 already in core; push it back in the queue until we encounter
1782 a line larger than it. */
1783 for (i = 1; i < nfiles; ++i)
1785 int cmp = compare (cur[ord[0]], cur[ord[i]]);
1786 if (cmp < 0 || (cmp == 0 && ord[0] < ord[i]))
1787 break;
1789 t = ord[0];
1790 for (j = 1; j < i; ++j)
1791 ord[j - 1] = ord[j];
1792 ord[i - 1] = t;
1795 if (unique && savedline)
1797 write_bytes (saved.text, saved.length, ofp, output_file);
1798 free (saved.text);
1801 xfclose (ofp, output_file);
1804 /* Merge into T the two sorted arrays of lines LO (with NLO members)
1805 and HI (with NHI members). T, LO, and HI point just past their
1806 respective arrays, and the arrays are in reverse order. NLO and
1807 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
1809 static inline void
1810 mergelines (struct line *t,
1811 struct line const *lo, size_t nlo,
1812 struct line const *hi, size_t nhi)
1814 for (;;)
1815 if (compare (lo - 1, hi - 1) <= 0)
1817 *--t = *--lo;
1818 if (! --nlo)
1820 /* HI - NHI equalled T - (NLO + NHI) when this function
1821 began. Therefore HI must equal T now, and there is no
1822 need to copy from HI to T. */
1823 return;
1826 else
1828 *--t = *--hi;
1829 if (! --nhi)
1832 *--t = *--lo;
1833 while (--nlo);
1835 return;
1840 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
1841 NLINES must be at least 2.
1842 The input and output arrays are in reverse order, and LINES and
1843 TEMP point just past the end of their respective arrays.
1845 Use a recursive divide-and-conquer algorithm, in the style
1846 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
1847 the optimization suggested by exercise 5.2.4-10; this requires room
1848 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
1849 writes that this memory optimization was originally published by
1850 D. A. Bell, Comp J. 1 (1958), 75. */
1852 static void
1853 sortlines (struct line *lines, size_t nlines, struct line *temp)
1855 if (nlines == 2)
1857 if (0 < compare (&lines[-1], &lines[-2]))
1859 struct line tmp = lines[-1];
1860 lines[-1] = lines[-2];
1861 lines[-2] = tmp;
1864 else
1866 size_t nlo = nlines / 2;
1867 size_t nhi = nlines - nlo;
1868 struct line *lo = lines;
1869 struct line *hi = lines - nlo;
1870 struct line *sorted_lo = temp;
1872 sortlines (hi, nhi, temp);
1873 if (1 < nlo)
1874 sortlines_temp (lo, nlo, sorted_lo);
1875 else
1876 sorted_lo[-1] = lo[-1];
1878 mergelines (lines, sorted_lo, nlo, hi, nhi);
1882 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
1883 rather than sorting in place. */
1885 static void
1886 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
1888 if (nlines == 2)
1890 bool swap = (0 < compare (&lines[-1], &lines[-2]));
1891 temp[-1] = lines[-1 - swap];
1892 temp[-2] = lines[-2 + swap];
1894 else
1896 size_t nlo = nlines / 2;
1897 size_t nhi = nlines - nlo;
1898 struct line *lo = lines;
1899 struct line *hi = lines - nlo;
1900 struct line *sorted_hi = temp - nlo;
1902 sortlines_temp (hi, nhi, sorted_hi);
1903 if (1 < nlo)
1904 sortlines (lo, nlo, temp);
1906 mergelines (temp, lo, nlo, sorted_hi, nhi);
1910 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
1911 the same as OUTFILE. If found, merge the found instances (and perhaps
1912 some other files) into a temporary file so that it can in turn be
1913 merged into OUTFILE without destroying OUTFILE before it is completely
1914 read. Return the new value of NFILES, which differs from the old if
1915 some merging occurred.
1917 This test ensures that an otherwise-erroneous use like
1918 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
1919 It's not clear that POSIX requires this nicety.
1920 Detect common error cases, but don't try to catch obscure cases like
1921 "cat ... FILE ... | sort -m -o FILE"
1922 where traditional "sort" doesn't copy the input and where
1923 people should know that they're getting into trouble anyway.
1924 Catching these obscure cases would slow down performance in
1925 common cases. */
1927 static size_t
1928 avoid_trashing_input (char **files, size_t ntemps, size_t nfiles,
1929 char const *outfile)
1931 size_t i;
1932 bool got_outstat = false;
1933 struct stat outstat;
1935 for (i = ntemps; i < nfiles; i++)
1937 bool standard_input = STREQ (files[i], "-");
1938 bool same;
1939 struct stat instat;
1941 if (outfile && STREQ (outfile, files[i]) && ! standard_input)
1942 same = true;
1943 else
1945 if (! got_outstat)
1947 if ((outfile
1948 ? stat (outfile, &outstat)
1949 : fstat (STDOUT_FILENO, &outstat))
1950 != 0)
1951 break;
1952 got_outstat = true;
1955 same = (((standard_input
1956 ? fstat (STDIN_FILENO, &instat)
1957 : stat (files[i], &instat))
1958 == 0)
1959 && SAME_INODE (instat, outstat));
1962 if (same)
1964 FILE *tftp;
1965 char *temp = create_temp_file (&tftp);
1966 mergefps (&files[i], 0, nfiles - i, tftp, temp);
1967 files[i] = temp;
1968 return i + 1;
1972 return nfiles;
1975 /* Merge the input FILES. NTEMPS is the number of files at the
1976 start of FILES that are temporary; it is zero at the top level.
1977 NFILES is the total number of files. Put the output in
1978 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
1980 static void
1981 merge (char **files, size_t ntemps, size_t nfiles, char const *output_file)
1983 while (NMERGE < nfiles)
1985 /* Number of input files processed so far. */
1986 size_t in;
1988 /* Number of output files generated so far. */
1989 size_t out;
1991 /* nfiles % NMERGE; this counts input files that are left over
1992 after all full-sized merges have been done. */
1993 size_t remainder;
1995 /* Number of easily-available slots at the next loop iteration. */
1996 size_t cheap_slots;
1998 /* Do as many NMERGE-size merges as possible. */
1999 for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
2001 FILE *tfp;
2002 char *temp = create_temp_file (&tfp);
2003 size_t nt = MIN (ntemps, NMERGE);
2004 ntemps -= nt;
2005 mergefps (&files[in], nt, NMERGE, tfp, temp);
2006 files[out] = temp;
2009 remainder = nfiles - in;
2010 cheap_slots = NMERGE - out % NMERGE;
2012 if (cheap_slots < remainder)
2014 /* So many files remain that they can't all be put into the last
2015 NMERGE-sized output window. Do one more merge. Merge as few
2016 files as possible, to avoid needless I/O. */
2017 size_t nshortmerge = remainder - cheap_slots + 1;
2018 FILE *tfp;
2019 char *temp = create_temp_file (&tfp);
2020 size_t nt = MIN (ntemps, nshortmerge);
2021 ntemps -= nt;
2022 mergefps (&files[in], nt, nshortmerge, tfp, temp);
2023 files[out++] = temp;
2024 in += nshortmerge;
2027 /* Put the remaining input files into the last NMERGE-sized output
2028 window, so they will be merged in the next pass. */
2029 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2030 ntemps += out;
2031 nfiles -= in - out;
2034 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2035 mergefps (files, ntemps, nfiles, NULL, output_file);
2038 /* Sort NFILES FILES onto OUTPUT_FILE. */
2040 static void
2041 sort (char * const *files, size_t nfiles, char const *output_file)
2043 struct buffer buf;
2044 size_t ntemps = 0;
2045 bool output_file_created = false;
2047 buf.alloc = 0;
2049 while (nfiles)
2051 char const *temp_output;
2052 char const *file = *files;
2053 FILE *fp = xfopen (file, "r");
2054 FILE *tfp;
2055 size_t bytes_per_line = (2 * sizeof (struct line)
2056 - sizeof (struct line) / 2);
2058 if (! buf.alloc)
2059 initbuf (&buf, bytes_per_line,
2060 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2061 buf.eof = false;
2062 files++;
2063 nfiles--;
2065 while (fillbuf (&buf, fp, file))
2067 struct line *line;
2068 struct line *linebase;
2070 if (buf.eof && nfiles
2071 && (bytes_per_line + 1
2072 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2074 /* End of file, but there is more input and buffer room.
2075 Concatenate the next input file; this is faster in
2076 the usual case. */
2077 buf.left = buf.used;
2078 break;
2081 line = buffer_linelim (&buf);
2082 linebase = line - buf.nlines;
2083 if (1 < buf.nlines)
2084 sortlines (line, buf.nlines, linebase);
2085 if (buf.eof && !nfiles && !ntemps && !buf.left)
2087 xfclose (fp, file);
2088 tfp = xfopen (output_file, "w");
2089 temp_output = output_file;
2090 output_file_created = true;
2092 else
2094 ++ntemps;
2095 temp_output = create_temp_file (&tfp);
2100 line--;
2101 write_bytes (line->text, line->length, tfp, temp_output);
2102 if (unique)
2103 while (linebase < line && compare (line, line - 1) == 0)
2104 line--;
2106 while (linebase < line);
2108 xfclose (tfp, temp_output);
2110 if (output_file_created)
2111 goto finish;
2113 xfclose (fp, file);
2116 finish:
2117 free (buf.buf);
2119 if (! output_file_created)
2121 size_t i;
2122 struct tempnode *node = temphead;
2123 char **tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2124 for (i = 0; node; i++)
2126 tempfiles[i] = node->name;
2127 node = node->next;
2129 merge (tempfiles, ntemps, ntemps, output_file);
2130 free (tempfiles);
2134 /* Insert key KEY at the end of the key list. */
2136 static void
2137 insertkey (struct keyfield *key)
2139 struct keyfield **p;
2141 for (p = &keylist; *p; p = &(*p)->next)
2142 continue;
2143 *p = key;
2144 key->next = NULL;
2147 /* Report a bad field specification SPEC, with extra info MSGID. */
2149 static void badfieldspec (char const *, char const *)
2150 ATTRIBUTE_NORETURN;
2151 static void
2152 badfieldspec (char const *spec, char const *msgid)
2154 error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"),
2155 _(msgid), spec);
2156 abort ();
2159 /* Parse the leading integer in STRING and store the resulting value
2160 (which must fit into size_t) into *VAL. Return the address of the
2161 suffix after the integer. If MSGID is NULL, return NULL after
2162 failure; otherwise, report MSGID and exit on failure. */
2164 static char const *
2165 parse_field_count (char const *string, size_t *val, char const *msgid)
2167 char *suffix;
2168 uintmax_t n;
2170 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2172 case LONGINT_OK:
2173 case LONGINT_INVALID_SUFFIX_CHAR:
2174 *val = n;
2175 if (*val == n)
2176 break;
2177 /* Fall through. */
2178 case LONGINT_OVERFLOW:
2179 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2180 if (msgid)
2181 error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2182 _(msgid), (int) (suffix - string), string);
2183 return NULL;
2185 case LONGINT_INVALID:
2186 if (msgid)
2187 error (SORT_FAILURE, 0, _("%s: invalid count at start of `%s'"),
2188 _(msgid), string);
2189 return NULL;
2192 return suffix;
2195 /* Handle interrupts and hangups. */
2197 static void
2198 sighandler (int sig)
2200 #ifndef SA_NOCLDSTOP
2201 signal (sig, SIG_IGN);
2202 #endif
2204 cleanup ();
2206 signal (sig, SIG_DFL);
2207 raise (sig);
2210 /* Set the ordering options for KEY specified in S.
2211 Return the address of the first character in S that
2212 is not a valid ordering option.
2213 BLANKTYPE is the kind of blanks that 'b' should skip. */
2215 static char *
2216 set_ordering (register const char *s, struct keyfield *key,
2217 enum blanktype blanktype)
2219 while (*s)
2221 switch (*s)
2223 case 'b':
2224 if (blanktype == bl_start || blanktype == bl_both)
2225 key->skipsblanks = true;
2226 if (blanktype == bl_end || blanktype == bl_both)
2227 key->skipeblanks = true;
2228 break;
2229 case 'd':
2230 key->ignore = nondictionary;
2231 break;
2232 case 'f':
2233 key->translate = fold_toupper;
2234 break;
2235 case 'g':
2236 key->general_numeric = true;
2237 break;
2238 case 'i':
2239 /* Option order should not matter, so don't let -i override
2240 -d. -d implies -i, but -i does not imply -d. */
2241 if (! key->ignore)
2242 key->ignore = nonprinting;
2243 break;
2244 case 'M':
2245 key->month = true;
2246 break;
2247 case 'n':
2248 key->numeric = true;
2249 break;
2250 case 'r':
2251 key->reverse = true;
2252 break;
2253 default:
2254 return (char *) s;
2256 ++s;
2258 return (char *) s;
2261 static struct keyfield *
2262 new_key (void)
2264 struct keyfield *key = xzalloc (sizeof *key);
2265 key->eword = SIZE_MAX;
2266 return key;
2270 main (int argc, char **argv)
2272 struct keyfield *key;
2273 struct keyfield gkey;
2274 char const *s;
2275 int c = 0;
2276 bool checkonly = false;
2277 bool mergeonly = false;
2278 size_t nfiles = 0;
2279 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2280 bool obsolete_usage = (posix2_version () < 200112);
2281 char const *short_options = (obsolete_usage
2282 ? COMMON_SHORT_OPTIONS "y::"
2283 : COMMON_SHORT_OPTIONS "y:");
2284 char *minus = "-", **files;
2285 char const *outfile = NULL;
2287 initialize_main (&argc, &argv);
2288 program_name = argv[0];
2289 setlocale (LC_ALL, "");
2290 bindtextdomain (PACKAGE, LOCALEDIR);
2291 textdomain (PACKAGE);
2293 atexit (cleanup);
2295 initialize_exit_failure (SORT_FAILURE);
2296 atexit (close_stdout);
2298 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2299 #if HAVE_NL_LANGINFO
2300 hard_LC_TIME = hard_locale (LC_TIME);
2301 #endif
2303 /* Get locale's representation of the decimal point. */
2305 struct lconv const *locale = localeconv ();
2307 /* If the locale doesn't define a decimal point, or if the decimal
2308 point is multibyte, use the C locale's decimal point. FIXME:
2309 add support for multibyte decimal points. */
2310 decimal_point = locale->decimal_point[0];
2311 if (! decimal_point || locale->decimal_point[1])
2312 decimal_point = '.';
2314 /* FIXME: add support for multibyte thousands separators. */
2315 thousands_sep = *locale->thousands_sep;
2316 if (! thousands_sep || locale->thousands_sep[1])
2317 thousands_sep = CHAR_MAX + 1;
2320 have_read_stdin = false;
2321 inittables ();
2324 size_t i;
2325 static int const sig[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2326 enum { nsigs = sizeof sig / sizeof sig[0] };
2328 #ifdef SA_NOCLDSTOP
2329 struct sigaction act;
2331 sigemptyset (&caught_signals);
2332 for (i = 0; i < nsigs; i++)
2334 sigaction (sig[i], NULL, &act);
2335 if (act.sa_handler != SIG_IGN)
2336 sigaddset (&caught_signals, sig[i]);
2339 act.sa_handler = sighandler;
2340 act.sa_mask = caught_signals;
2341 act.sa_flags = 0;
2343 for (i = 0; i < nsigs; i++)
2344 if (sigismember (&caught_signals, sig[i]))
2345 sigaction (sig[i], &act, NULL);
2346 #else
2347 for (i = 0; i < nsigs; i++)
2348 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2349 signal (sig[i], sighandler);
2350 #endif
2353 gkey.sword = gkey.eword = SIZE_MAX;
2354 gkey.ignore = NULL;
2355 gkey.translate = NULL;
2356 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
2357 gkey.skipsblanks = gkey.skipeblanks = false;
2359 files = xnmalloc (argc, sizeof *files);
2361 for (;;)
2363 /* Parse an operand as a file after "--" was seen; or if
2364 pedantic and a file was seen, unless the POSIX version
2365 predates 1003.1-2001 and -c was not seen and the operand is
2366 "-o FILE" or "-oFILE". */
2368 if (c == -1
2369 || (posixly_correct && nfiles != 0
2370 && ! (obsolete_usage
2371 && ! checkonly
2372 && optind != argc
2373 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2374 && (argv[optind][2] || optind + 1 != argc)))
2375 || ((c = getopt_long (argc, argv, short_options,
2376 long_options, NULL))
2377 == -1))
2379 if (argc <= optind)
2380 break;
2381 files[nfiles++] = argv[optind++];
2383 else switch (c)
2385 case 1:
2386 key = NULL;
2387 if (obsolete_usage && optarg[0] == '+')
2389 /* Treat +POS1 [-POS2] as a key if possible; but silently
2390 treat an operand as a file if it is not a valid +POS1. */
2391 key = new_key ();
2392 s = parse_field_count (optarg + 1, &key->sword, NULL);
2393 if (s && *s == '.')
2394 s = parse_field_count (s + 1, &key->schar, NULL);
2395 if (! (key->sword | key->schar))
2396 key->sword = SIZE_MAX;
2397 if (! s || *set_ordering (s, key, bl_start))
2399 free (key);
2400 key = NULL;
2402 else
2404 if (optind != argc && argv[optind][0] == '-'
2405 && ISDIGIT (argv[optind][1]))
2407 char const *optarg1 = argv[optind++];
2408 s = parse_field_count (optarg1 + 1, &key->eword,
2409 N_("invalid number after `-'"));
2410 if (*s == '.')
2411 s = parse_field_count (s + 1, &key->echar,
2412 N_("invalid number after `.'"));
2413 if (*set_ordering (s, key, bl_end))
2414 badfieldspec (optarg1,
2415 N_("stray character in field spec"));
2417 insertkey (key);
2420 if (! key)
2421 files[nfiles++] = optarg;
2422 break;
2424 case 'b':
2425 case 'd':
2426 case 'f':
2427 case 'g':
2428 case 'i':
2429 case 'M':
2430 case 'n':
2431 case 'r':
2433 char str[2];
2434 str[0] = c;
2435 str[1] = '\0';
2436 set_ordering (str, &gkey, bl_both);
2438 break;
2440 case 'c':
2441 checkonly = true;
2442 break;
2444 case 'k':
2445 key = new_key ();
2447 /* Get POS1. */
2448 s = parse_field_count (optarg, &key->sword,
2449 N_("invalid number at field start"));
2450 if (! key->sword--)
2452 /* Provoke with `sort -k0' */
2453 badfieldspec (optarg, N_("field number is zero"));
2455 if (*s == '.')
2457 s = parse_field_count (s + 1, &key->schar,
2458 N_("invalid number after `.'"));
2459 if (! key->schar--)
2461 /* Provoke with `sort -k1.0' */
2462 badfieldspec (optarg, N_("character offset is zero"));
2465 if (! (key->sword | key->schar))
2466 key->sword = SIZE_MAX;
2467 s = set_ordering (s, key, bl_start);
2468 if (*s != ',')
2470 key->eword = SIZE_MAX;
2471 key->echar = 0;
2473 else
2475 /* Get POS2. */
2476 s = parse_field_count (s + 1, &key->eword,
2477 N_("invalid number after `,'"));
2478 if (! key->eword--)
2480 /* Provoke with `sort -k1,0' */
2481 badfieldspec (optarg, N_("field number is zero"));
2483 if (*s == '.')
2484 s = parse_field_count (s + 1, &key->echar,
2485 N_("invalid number after `.'"));
2486 else
2488 /* `-k 2,3' is equivalent to `+1 -3'. */
2489 key->eword++;
2491 s = set_ordering (s, key, bl_end);
2493 if (*s)
2494 badfieldspec (optarg, N_("stray character in field spec"));
2495 insertkey (key);
2496 break;
2498 case 'm':
2499 mergeonly = true;
2500 break;
2502 case 'o':
2503 if (outfile && !STREQ (outfile, optarg))
2504 error (SORT_FAILURE, 0, _("multiple output files specified"));
2505 outfile = optarg;
2506 break;
2508 case 's':
2509 stable = true;
2510 break;
2512 case 'S':
2513 specify_sort_size (optarg);
2514 break;
2516 case 't':
2518 char newtab = optarg[0];
2519 if (! newtab)
2520 error (SORT_FAILURE, 0, _("empty tab"));
2521 if (optarg[1])
2523 if (STREQ (optarg, "\\0"))
2524 newtab = '\0';
2525 else
2527 /* Provoke with `sort -txx'. Complain about
2528 "multi-character tab" instead of "multibyte tab", so
2529 that the diagnostic's wording does not need to be
2530 changed once multibyte characters are supported. */
2531 error (SORT_FAILURE, 0, _("multi-character tab `%s'"),
2532 optarg);
2535 if (tab != TAB_DEFAULT && tab != newtab)
2536 error (SORT_FAILURE, 0, _("incompatible tabs"));
2537 tab = newtab;
2539 break;
2541 case 'T':
2542 add_temp_dir (optarg);
2543 break;
2545 case 'u':
2546 unique = true;
2547 break;
2549 case 'y':
2550 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
2551 through Solaris 7. It is also accepted by many non-Solaris
2552 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
2553 -y is marked as obsolete starting with Solaris 8 (1999), but is
2554 still accepted as of Solaris 10 prerelease (2004).
2556 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
2557 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
2558 and which in general ignores the argument after "-y" if it
2559 consists entirely of digits (it can even be empty). */
2560 if (!optarg && optind != argc)
2562 char const *p;
2563 for (p = argv[optind]; ISDIGIT (*p); p++)
2564 continue;
2565 optind += !*p;
2567 break;
2569 case 'z':
2570 eolchar = 0;
2571 break;
2573 case_GETOPT_HELP_CHAR;
2575 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2577 default:
2578 usage (SORT_FAILURE);
2582 /* Inheritance of global options to individual keys. */
2583 for (key = keylist; key; key = key->next)
2584 if (! (key->ignore || key->translate
2585 || (key->skipsblanks | key->reverse
2586 | key->skipeblanks | key->month | key->numeric
2587 | key->general_numeric)))
2589 key->ignore = gkey.ignore;
2590 key->translate = gkey.translate;
2591 key->skipsblanks = gkey.skipsblanks;
2592 key->skipeblanks = gkey.skipeblanks;
2593 key->month = gkey.month;
2594 key->numeric = gkey.numeric;
2595 key->general_numeric = gkey.general_numeric;
2596 key->reverse = gkey.reverse;
2599 if (!keylist && (gkey.ignore || gkey.translate
2600 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
2601 | gkey.numeric | gkey.general_numeric)))
2602 insertkey (&gkey);
2603 reverse = gkey.reverse;
2605 if (temp_dir_count == 0)
2607 char const *tmp_dir = getenv ("TMPDIR");
2608 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
2611 if (nfiles == 0)
2613 nfiles = 1;
2614 files = &minus;
2617 if (checkonly)
2619 if (nfiles > 1)
2621 error (0, 0, _("extra operand %s not allowed with -c"),
2622 quote (files[1]));
2623 usage (SORT_FAILURE);
2626 /* POSIX requires that sort return 1 IFF invoked with -c and the
2627 input is not properly sorted. */
2628 exit (check (files[0]) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
2631 if (mergeonly)
2632 merge (files, 0, nfiles, outfile);
2633 else
2634 sort (files, nfiles, outfile);
2636 if (have_read_stdin && fclose (stdin) == EOF)
2637 die (_("close failed"), "-");
2639 exit (EXIT_SUCCESS);