maint: replace each "for (;;)" with "while (true)"
[coreutils.git] / src / sort.c
blob41194ba22fb8b8ceca88917853fca6f3b2544424
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written December 1988 by Mike Haertel.
18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19 or (US mail) as Mike Haertel c/o Free Software Foundation.
21 Ørn E. Hansen added NLS support in 1997. */
23 #include <config.h>
25 #include <getopt.h>
26 #include <sys/types.h>
27 #include <sys/wait.h>
28 #include <signal.h>
29 #include "system.h"
30 #include "argmatch.h"
31 #include "error.h"
32 #include "filevercmp.h"
33 #include "hard-locale.h"
34 #include "hash.h"
35 #include "ignore-value.h"
36 #include "md5.h"
37 #include "mbswidth.h"
38 #include "physmem.h"
39 #include "posixver.h"
40 #include "quote.h"
41 #include "quotearg.h"
42 #include "randread.h"
43 #include "readtokens0.h"
44 #include "stdio--.h"
45 #include "stdlib--.h"
46 #include "strnumcmp.h"
47 #include "xmemcoll.h"
48 #include "xmemxfrm.h"
49 #include "xnanosleep.h"
50 #include "xstrtol.h"
52 #if HAVE_SYS_RESOURCE_H
53 # include <sys/resource.h>
54 #endif
55 #ifndef RLIMIT_DATA
56 struct rlimit { size_t rlim_cur; };
57 # define getrlimit(Resource, Rlp) (-1)
58 #endif
60 /* The official name of this program (e.g., no `g' prefix). */
61 #define PROGRAM_NAME "sort"
63 #define AUTHORS \
64 proper_name ("Mike Haertel"), \
65 proper_name ("Paul Eggert")
67 #if HAVE_LANGINFO_CODESET
68 # include <langinfo.h>
69 #endif
71 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
72 present. */
73 #ifndef SA_NOCLDSTOP
74 # define SA_NOCLDSTOP 0
75 /* No sigprocmask. Always 'return' zero. */
76 # define sigprocmask(How, Set, Oset) (0)
77 # define sigset_t int
78 # if ! HAVE_SIGINTERRUPT
79 # define siginterrupt(sig, flag) /* empty */
80 # endif
81 #endif
83 #if !defined OPEN_MAX && defined NR_OPEN
84 # define OPEN_MAX NR_OPEN
85 #endif
86 #if !defined OPEN_MAX
87 # define OPEN_MAX 20
88 #endif
90 #define UCHAR_LIM (UCHAR_MAX + 1)
92 #ifndef DEFAULT_TMPDIR
93 # define DEFAULT_TMPDIR "/tmp"
94 #endif
96 /* Exit statuses. */
97 enum
99 /* POSIX says to exit with status 1 if invoked with -c and the
100 input is not properly sorted. */
101 SORT_OUT_OF_ORDER = 1,
103 /* POSIX says any other irregular exit must exit with a status
104 code greater than 1. */
105 SORT_FAILURE = 2
108 enum
110 /* The number of times we should try to fork a compression process
111 (we retry if the fork call fails). We don't _need_ to compress
112 temp files, this is just to reduce disk access, so this number
113 can be small. Each retry doubles in duration. */
114 MAX_FORK_TRIES_COMPRESS = 4,
116 /* The number of times we should try to fork a decompression process.
117 If we can't fork a decompression process, we can't sort, so this
118 number should be big. Each retry doubles in duration. */
119 MAX_FORK_TRIES_DECOMPRESS = 9
122 /* The representation of the decimal point in the current locale. */
123 static int decimal_point;
125 /* Thousands separator; if -1, then there isn't one. */
126 static int thousands_sep;
128 /* Nonzero if the corresponding locales are hard. */
129 static bool hard_LC_COLLATE;
130 #if HAVE_NL_LANGINFO
131 static bool hard_LC_TIME;
132 #endif
134 #define NONZERO(x) ((x) != 0)
136 /* The kind of blanks for '-b' to skip in various options. */
137 enum blanktype { bl_start, bl_end, bl_both };
139 /* The character marking end of line. Default to \n. */
140 static char eolchar = '\n';
142 /* Lines are held in core as counted strings. */
143 struct line
145 char *text; /* Text of the line. */
146 size_t length; /* Length including final newline. */
147 char *keybeg; /* Start of first key. */
148 char *keylim; /* Limit of first key. */
151 /* Input buffers. */
152 struct buffer
154 char *buf; /* Dynamically allocated buffer,
155 partitioned into 3 regions:
156 - input data;
157 - unused area;
158 - an array of lines, in reverse order. */
159 size_t used; /* Number of bytes used for input data. */
160 size_t nlines; /* Number of lines in the line array. */
161 size_t alloc; /* Number of bytes allocated. */
162 size_t left; /* Number of bytes left from previous reads. */
163 size_t line_bytes; /* Number of bytes to reserve for each line. */
164 bool eof; /* An EOF has been read. */
167 struct keyfield
169 size_t sword; /* Zero-origin 'word' to start at. */
170 size_t schar; /* Additional characters to skip. */
171 size_t eword; /* Zero-origin last 'word' of key. */
172 size_t echar; /* Additional characters in field. */
173 bool const *ignore; /* Boolean array of characters to ignore. */
174 char const *translate; /* Translation applied to characters. */
175 bool skipsblanks; /* Skip leading blanks when finding start. */
176 bool skipeblanks; /* Skip leading blanks when finding end. */
177 bool numeric; /* Flag for numeric comparison. Handle
178 strings of digits with optional decimal
179 point, but no exponential notation. */
180 bool random; /* Sort by random hash of key. */
181 bool general_numeric; /* Flag for general, numeric comparison.
182 Handle numbers in exponential notation. */
183 bool human_numeric; /* Flag for sorting by human readable
184 units with either SI xor IEC prefixes. */
185 int iec_present; /* Flag for checking for mixed SI and IEC. */
186 bool month; /* Flag for comparison by month name. */
187 bool reverse; /* Reverse the sense of comparison. */
188 bool version; /* sort by version number */
189 bool obsolete_used; /* obsolescent key option format is used. */
190 struct keyfield *next; /* Next keyfield to try. */
193 struct month
195 char const *name;
196 int val;
199 /* FIXME: None of these tables work with multibyte character sets.
200 Also, there are many other bugs when handling multibyte characters.
201 One way to fix this is to rewrite `sort' to use wide characters
202 internally, but doing this with good performance is a bit
203 tricky. */
205 /* Table of blanks. */
206 static bool blanks[UCHAR_LIM];
208 /* Table of non-printing characters. */
209 static bool nonprinting[UCHAR_LIM];
211 /* Table of non-dictionary characters (not letters, digits, or blanks). */
212 static bool nondictionary[UCHAR_LIM];
214 /* Translation table folding lower case to upper. */
215 static unsigned char fold_toupper[UCHAR_LIM];
217 #define MONTHS_PER_YEAR 12
219 /* Table mapping month names to integers.
220 Alphabetic order allows binary search. */
221 static struct month monthtab[] =
223 {"APR", 4},
224 {"AUG", 8},
225 {"DEC", 12},
226 {"FEB", 2},
227 {"JAN", 1},
228 {"JUL", 7},
229 {"JUN", 6},
230 {"MAR", 3},
231 {"MAY", 5},
232 {"NOV", 11},
233 {"OCT", 10},
234 {"SEP", 9}
237 /* During the merge phase, the number of files to merge at once. */
238 #define NMERGE_DEFAULT 16
240 /* Minimum size for a merge or check buffer. */
241 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
243 /* Minimum sort size; the code might not work with smaller sizes. */
244 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
246 /* The number of bytes needed for a merge or check buffer, which can
247 function relatively efficiently even if it holds only one line. If
248 a longer line is seen, this value is increased. */
249 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
251 /* The approximate maximum number of bytes of main memory to use, as
252 specified by the user. Zero if the user has not specified a size. */
253 static size_t sort_size;
255 /* The guessed size for non-regular files. */
256 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
258 /* Array of directory names in which any temporary files are to be created. */
259 static char const **temp_dirs;
261 /* Number of temporary directory names used. */
262 static size_t temp_dir_count;
264 /* Number of allocated slots in temp_dirs. */
265 static size_t temp_dir_alloc;
267 /* Flag to reverse the order of all comparisons. */
268 static bool reverse;
270 /* Flag for stable sort. This turns off the last ditch bytewise
271 comparison of lines, and instead leaves lines in the same order
272 they were read if all keys compare equal. */
273 static bool stable;
275 /* If TAB has this value, blanks separate fields. */
276 enum { TAB_DEFAULT = CHAR_MAX + 1 };
278 /* Tab character separating fields. If TAB_DEFAULT, then fields are
279 separated by the empty string between a non-blank character and a blank
280 character. */
281 static int tab = TAB_DEFAULT;
283 /* Flag to remove consecutive duplicate lines from the output.
284 Only the last of a sequence of equal lines will be output. */
285 static bool unique;
287 /* Nonzero if any of the input files are the standard input. */
288 static bool have_read_stdin;
290 /* List of key field comparisons to be tried. */
291 static struct keyfield *keylist;
293 /* Program used to (de)compress temp files. Must accept -d. */
294 static char const *compress_program;
296 /* Annotate the output with extra info to aid the user. */
297 static bool debug;
299 /* Maximum number of files to merge in one go. If more than this
300 number are present, temp files will be used. */
301 static unsigned int nmerge = NMERGE_DEFAULT;
303 static void sortlines_temp (struct line *, size_t, struct line *);
305 /* Report MESSAGE for FILE, then clean up and exit.
306 If FILE is null, it represents standard output. */
308 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
309 static void
310 die (char const *message, char const *file)
312 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
313 exit (SORT_FAILURE);
316 void
317 usage (int status)
319 if (status != EXIT_SUCCESS)
320 fprintf (stderr, _("Try `%s --help' for more information.\n"),
321 program_name);
322 else
324 printf (_("\
325 Usage: %s [OPTION]... [FILE]...\n\
326 or: %s [OPTION]... --files0-from=F\n\
328 program_name, program_name);
329 fputs (_("\
330 Write sorted concatenation of all FILE(s) to standard output.\n\
332 "), stdout);
333 fputs (_("\
334 Mandatory arguments to long options are mandatory for short options too.\n\
335 "), stdout);
336 fputs (_("\
337 Ordering options:\n\
339 "), stdout);
340 fputs (_("\
341 -b, --ignore-leading-blanks ignore leading blanks\n\
342 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
343 -f, --ignore-case fold lower case to upper case characters\n\
344 "), stdout);
345 fputs (_("\
346 -g, --general-numeric-sort compare according to general numerical value\n\
347 -i, --ignore-nonprinting consider only printable characters\n\
348 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
349 "), stdout);
350 fputs (_("\
351 -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n\
352 "), stdout);
353 fputs (_("\
354 -n, --numeric-sort compare according to string numerical value\n\
355 -R, --random-sort sort by random hash of keys\n\
356 --random-source=FILE get random bytes from FILE\n\
357 -r, --reverse reverse the result of comparisons\n\
358 "), stdout);
359 fputs (_("\
360 --sort=WORD sort according to WORD:\n\
361 general-numeric -g, human-numeric -h, month -M,\n\
362 numeric -n, random -R, version -V\n\
363 -V, --version-sort natural sort of (version) numbers within text\n\
365 "), stdout);
366 fputs (_("\
367 Other options:\n\
369 "), stdout);
370 fputs (_("\
371 --batch-size=NMERGE merge at most NMERGE inputs at once;\n\
372 for more use temp files\n\
373 "), stdout);
374 fputs (_("\
375 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
376 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
377 --compress-program=PROG compress temporaries with PROG;\n\
378 decompress them with PROG -d\n\
379 "), stdout);
380 fputs (_("\
381 --debug annotate the part of the line used to sort,\n\
382 and warn about questionable usage to stderr\n\
383 --files0-from=F read input from the files specified by\n\
384 NUL-terminated names in file F;\n\
385 If F is - then read names from standard input\n\
386 "), stdout);
387 fputs (_("\
388 -k, --key=POS1[,POS2] start a key at POS1 (origin 1), end it at POS2\n\
389 (default end of line)\n\
390 -m, --merge merge already sorted files; do not sort\n\
391 "), stdout);
392 fputs (_("\
393 -o, --output=FILE write result to FILE instead of standard output\n\
394 -s, --stable stabilize sort by disabling last-resort comparison\n\
395 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
396 "), stdout);
397 printf (_("\
398 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
399 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
400 multiple options specify multiple directories\n\
401 -u, --unique with -c, check for strict ordering;\n\
402 without -c, output only the first of an equal run\n\
403 "), DEFAULT_TMPDIR);
404 fputs (_("\
405 -z, --zero-terminated end lines with 0 byte, not newline\n\
406 "), stdout);
407 fputs (HELP_OPTION_DESCRIPTION, stdout);
408 fputs (VERSION_OPTION_DESCRIPTION, stdout);
409 fputs (_("\
411 POS is F[.C][OPTS], where F is the field number and C the character position\n\
412 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
413 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
414 one or more single-letter ordering options, which override global ordering\n\
415 options for that key. If no key is given, use the entire line as the key.\n\
417 SIZE may be followed by the following multiplicative suffixes:\n\
418 "), stdout);
419 fputs (_("\
420 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
422 With no FILE, or when FILE is -, read standard input.\n\
424 *** WARNING ***\n\
425 The locale specified by the environment affects sort order.\n\
426 Set LC_ALL=C to get the traditional sort order that uses\n\
427 native byte values.\n\
428 "), stdout );
429 emit_ancillary_info ();
432 exit (status);
435 /* For long options that have no equivalent short option, use a
436 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
437 enum
439 CHECK_OPTION = CHAR_MAX + 1,
440 COMPRESS_PROGRAM_OPTION,
441 DEBUG_PROGRAM_OPTION,
442 FILES0_FROM_OPTION,
443 NMERGE_OPTION,
444 RANDOM_SOURCE_OPTION,
445 SORT_OPTION
448 static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z";
450 static struct option const long_options[] =
452 {"ignore-leading-blanks", no_argument, NULL, 'b'},
453 {"check", optional_argument, NULL, CHECK_OPTION},
454 {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
455 {"debug", no_argument, NULL, DEBUG_PROGRAM_OPTION},
456 {"dictionary-order", no_argument, NULL, 'd'},
457 {"ignore-case", no_argument, NULL, 'f'},
458 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
459 {"general-numeric-sort", no_argument, NULL, 'g'},
460 {"ignore-nonprinting", no_argument, NULL, 'i'},
461 {"key", required_argument, NULL, 'k'},
462 {"merge", no_argument, NULL, 'm'},
463 {"month-sort", no_argument, NULL, 'M'},
464 {"numeric-sort", no_argument, NULL, 'n'},
465 {"human-numeric-sort", no_argument, NULL, 'h'},
466 {"version-sort", no_argument, NULL, 'V'},
467 {"random-sort", no_argument, NULL, 'R'},
468 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
469 {"sort", required_argument, NULL, SORT_OPTION},
470 {"output", required_argument, NULL, 'o'},
471 {"reverse", no_argument, NULL, 'r'},
472 {"stable", no_argument, NULL, 's'},
473 {"batch-size", required_argument, NULL, NMERGE_OPTION},
474 {"buffer-size", required_argument, NULL, 'S'},
475 {"field-separator", required_argument, NULL, 't'},
476 {"temporary-directory", required_argument, NULL, 'T'},
477 {"unique", no_argument, NULL, 'u'},
478 {"zero-terminated", no_argument, NULL, 'z'},
479 {GETOPT_HELP_OPTION_DECL},
480 {GETOPT_VERSION_OPTION_DECL},
481 {NULL, 0, NULL, 0},
484 #define CHECK_TABLE \
485 _ct_("quiet", 'C') \
486 _ct_("silent", 'C') \
487 _ct_("diagnose-first", 'c')
489 static char const *const check_args[] =
491 #define _ct_(_s, _c) _s,
492 CHECK_TABLE NULL
493 #undef _ct_
495 static char const check_types[] =
497 #define _ct_(_s, _c) _c,
498 CHECK_TABLE
499 #undef _ct_
502 #define SORT_TABLE \
503 _st_("general-numeric", 'g') \
504 _st_("human-numeric", 'h') \
505 _st_("month", 'M') \
506 _st_("numeric", 'n') \
507 _st_("random", 'R') \
508 _st_("version", 'V')
510 static char const *const sort_args[] =
512 #define _st_(_s, _c) _s,
513 SORT_TABLE NULL
514 #undef _st_
516 static char const sort_types[] =
518 #define _st_(_s, _c) _c,
519 SORT_TABLE
520 #undef _st_
523 /* The set of signals that are caught. */
524 static sigset_t caught_signals;
526 /* Critical section status. */
527 struct cs_status
529 bool valid;
530 sigset_t sigs;
533 /* Enter a critical section. */
534 static struct cs_status
535 cs_enter (void)
537 struct cs_status status;
538 status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
539 return status;
542 /* Leave a critical section. */
543 static void
544 cs_leave (struct cs_status status)
546 if (status.valid)
548 /* Ignore failure when restoring the signal mask. */
549 sigprocmask (SIG_SETMASK, &status.sigs, NULL);
553 /* The list of temporary files. */
554 struct tempnode
556 struct tempnode *volatile next;
557 pid_t pid; /* If compressed, the pid of compressor, else zero */
558 char name[1]; /* Actual size is 1 + file name length. */
560 static struct tempnode *volatile temphead;
561 static struct tempnode *volatile *temptail = &temphead;
563 struct sortfile
565 char const *name;
566 pid_t pid; /* If compressed, the pid of compressor, else zero */
569 /* A table where we store compression process states. We clean up all
570 processes in a timely manner so as not to exhaust system resources,
571 so we store the info on whether the process is still running, or has
572 been reaped here. */
573 static Hash_table *proctab;
575 enum { INIT_PROCTAB_SIZE = 47 };
577 enum procstate { ALIVE, ZOMBIE };
579 /* A proctab entry. The COUNT field is there in case we fork a new
580 compression process that has the same PID as an old zombie process
581 that is still in the table (because the process to decompress the
582 temp file it was associated with hasn't started yet). */
583 struct procnode
585 pid_t pid;
586 enum procstate state;
587 size_t count;
590 static size_t
591 proctab_hasher (const void *entry, size_t tabsize)
593 const struct procnode *node = entry;
594 return node->pid % tabsize;
597 static bool
598 proctab_comparator (const void *e1, const void *e2)
600 const struct procnode *n1 = e1, *n2 = e2;
601 return n1->pid == n2->pid;
604 /* The total number of forked processes (compressors and decompressors)
605 that have not been reaped yet. */
606 static size_t nprocs;
608 /* The number of child processes we'll allow before we try to reap some. */
609 enum { MAX_PROCS_BEFORE_REAP = 2 };
611 /* If 0 < PID, wait for the child process with that PID to exit.
612 If PID is -1, clean up a random child process which has finished and
613 return the process ID of that child. If PID is -1 and no processes
614 have quit yet, return 0 without waiting. */
616 static pid_t
617 reap (pid_t pid)
619 int status;
620 pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
622 if (cpid < 0)
623 error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
624 compress_program);
625 else if (0 < cpid)
627 if (! WIFEXITED (status) || WEXITSTATUS (status))
628 error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
629 compress_program);
630 --nprocs;
633 return cpid;
636 /* Add the PID of a running compression process to proctab, or update
637 the entry COUNT and STATE fields if it's already there. This also
638 creates the table for us the first time it's called. */
640 static void
641 register_proc (pid_t pid)
643 struct procnode test, *node;
645 if (! proctab)
647 proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
648 proctab_hasher,
649 proctab_comparator,
650 free);
651 if (! proctab)
652 xalloc_die ();
655 test.pid = pid;
656 node = hash_lookup (proctab, &test);
657 if (node)
659 node->state = ALIVE;
660 ++node->count;
662 else
664 node = xmalloc (sizeof *node);
665 node->pid = pid;
666 node->state = ALIVE;
667 node->count = 1;
668 if (hash_insert (proctab, node) == NULL)
669 xalloc_die ();
673 /* This is called when we reap a random process. We don't know
674 whether we have reaped a compression process or a decompression
675 process until we look in the table. If there's an ALIVE entry for
676 it, then we have reaped a compression process, so change the state
677 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
679 static void
680 update_proc (pid_t pid)
682 struct procnode test, *node;
684 test.pid = pid;
685 node = hash_lookup (proctab, &test);
686 if (node)
687 node->state = ZOMBIE;
690 /* This is for when we need to wait for a compression process to exit.
691 If it has a ZOMBIE entry in the table then it's already dead and has
692 been reaped. Note that if there's an ALIVE entry for it, it still may
693 already have died and been reaped if a second process was created with
694 the same PID. This is probably exceedingly rare, but to be on the safe
695 side we will have to wait for any compression process with this PID. */
697 static void
698 wait_proc (pid_t pid)
700 struct procnode test, *node;
702 test.pid = pid;
703 node = hash_lookup (proctab, &test);
704 if (node->state == ALIVE)
705 reap (pid);
707 node->state = ZOMBIE;
708 if (! --node->count)
710 hash_delete (proctab, node);
711 free (node);
715 /* Keep reaping finished children as long as there are more to reap.
716 This doesn't block waiting for any of them, it only reaps those
717 that are already dead. */
719 static void
720 reap_some (void)
722 pid_t pid;
724 while (0 < nprocs && (pid = reap (-1)))
725 update_proc (pid);
728 /* Clean up any remaining temporary files. */
730 static void
731 cleanup (void)
733 struct tempnode const *node;
735 for (node = temphead; node; node = node->next)
736 unlink (node->name);
737 temphead = NULL;
740 /* Cleanup actions to take when exiting. */
742 static void
743 exit_cleanup (void)
745 if (temphead)
747 /* Clean up any remaining temporary files in a critical section so
748 that a signal handler does not try to clean them too. */
749 struct cs_status cs = cs_enter ();
750 cleanup ();
751 cs_leave (cs);
754 close_stdout ();
757 /* Create a new temporary file, returning its newly allocated tempnode.
758 Store into *PFD the file descriptor open for writing.
759 If the creation fails, return NULL and store -1 into *PFD if the
760 failure is due to file descriptor exhaustion and
761 SURVIVE_FD_EXHAUSTION; otherwise, die. */
763 static struct tempnode *
764 create_temp_file (int *pfd, bool survive_fd_exhaustion)
766 static char const slashbase[] = "/sortXXXXXX";
767 static size_t temp_dir_index;
768 int fd;
769 int saved_errno;
770 char const *temp_dir = temp_dirs[temp_dir_index];
771 size_t len = strlen (temp_dir);
772 struct tempnode *node =
773 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
774 char *file = node->name;
775 struct cs_status cs;
777 memcpy (file, temp_dir, len);
778 memcpy (file + len, slashbase, sizeof slashbase);
779 node->next = NULL;
780 node->pid = 0;
781 if (++temp_dir_index == temp_dir_count)
782 temp_dir_index = 0;
784 /* Create the temporary file in a critical section, to avoid races. */
785 cs = cs_enter ();
786 fd = mkstemp (file);
787 if (0 <= fd)
789 *temptail = node;
790 temptail = &node->next;
792 saved_errno = errno;
793 cs_leave (cs);
794 errno = saved_errno;
796 if (fd < 0)
798 if (! (survive_fd_exhaustion && errno == EMFILE))
799 error (SORT_FAILURE, errno, _("cannot create temporary file in %s"),
800 quote (temp_dir));
801 free (node);
802 node = NULL;
805 *pfd = fd;
806 return node;
809 /* Predeclare an access pattern for input files.
810 Ignore any errors -- this is only advisory.
812 There are a few hints we could possibly provide,
813 and after careful testing it was decided that
814 specifying POSIX_FADV_SEQUENTIAL was not detrimental
815 to any cases. On Linux 2.6.31, this option doubles
816 the size of read ahead performed and thus was seen to
817 benefit these cases:
818 Merging
819 Sorting with a smaller internal buffer
820 Reading from faster flash devices
822 In _addition_ one could also specify other hints...
824 POSIX_FADV_WILLNEED was tested, but Linux 2.6.31
825 at least uses that to _synchronously_ prepopulate the cache
826 with the specified range. While sort does need to
827 read all of its input before outputting, a synchronous
828 read of the whole file up front precludes any processing
829 that sort could do in parallel with the system doing
830 read ahead of the data. This was seen to have negative effects
831 in a couple of cases:
832 Merging
833 Sorting with a smaller internal buffer
834 Note this option was seen to shorten the runtime for sort
835 on a multicore system with lots of RAM and other processes
836 competing for CPU. It could be argued that more explicit
837 scheduling hints with `nice` et. al. are more appropriate
838 for this situation.
840 POSIX_FADV_NOREUSE is a possibility as it could lower
841 the priority of input data in the cache as sort will
842 only need to process it once. However its functionality
843 has changed over Linux kernel versions and as of 2.6.31
844 it does nothing and thus we can't depend on what it might
845 do in future.
847 POSIX_FADV_DONTNEED is not appropriate for user specified
848 input files, but for temp files we do want to drop the
849 cache immediately after processing. This is done implicitly
850 however when the files are unlinked. */
852 static void
853 fadvise_input (FILE *fp)
855 #if HAVE_POSIX_FADVISE
856 if (fp)
858 int fd = fileno (fp);
859 ignore_value (posix_fadvise (fd, 0, 0, POSIX_FADV_SEQUENTIAL));
861 #endif
864 /* Return a stream for FILE, opened with mode HOW. A null FILE means
865 standard output; HOW should be "w". When opening for input, "-"
866 means standard input. To avoid confusion, do not return file
867 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
868 opening an ordinary FILE. Return NULL if unsuccessful. */
870 static FILE *
871 stream_open (const char *file, const char *how)
873 if (!file)
874 return stdout;
875 if (*how == 'r')
877 FILE *fp;
878 if (STREQ (file, "-"))
880 have_read_stdin = true;
881 fp = stdin;
883 else
884 fp = fopen (file, how);
885 fadvise_input (fp);
886 return fp;
888 return fopen (file, how);
891 /* Same as stream_open, except always return a non-null value; die on
892 failure. */
894 static FILE *
895 xfopen (const char *file, const char *how)
897 FILE *fp = stream_open (file, how);
898 if (!fp)
899 die (_("open failed"), file);
900 return fp;
903 /* Close FP, whose name is FILE, and report any errors. */
905 static void
906 xfclose (FILE *fp, char const *file)
908 switch (fileno (fp))
910 case STDIN_FILENO:
911 /* Allow reading stdin from tty more than once. */
912 if (feof (fp))
913 clearerr (fp);
914 break;
916 case STDOUT_FILENO:
917 /* Don't close stdout just yet. close_stdout does that. */
918 if (fflush (fp) != 0)
919 die (_("fflush failed"), file);
920 break;
922 default:
923 if (fclose (fp) != 0)
924 die (_("close failed"), file);
925 break;
929 static void
930 dup2_or_die (int oldfd, int newfd)
932 if (dup2 (oldfd, newfd) < 0)
933 error (SORT_FAILURE, errno, _("dup2 failed"));
936 /* Fork a child process for piping to and do common cleanup. The
937 TRIES parameter tells us how many times to try to fork before
938 giving up. Return the PID of the child, or -1 (setting errno)
939 on failure. */
941 static pid_t
942 pipe_fork (int pipefds[2], size_t tries)
944 #if HAVE_WORKING_FORK
945 struct tempnode *saved_temphead;
946 int saved_errno;
947 double wait_retry = 0.25;
948 pid_t pid IF_LINT (= -1);
949 struct cs_status cs;
951 if (pipe (pipefds) < 0)
952 return -1;
954 while (tries--)
956 /* This is so the child process won't delete our temp files
957 if it receives a signal before exec-ing. */
958 cs = cs_enter ();
959 saved_temphead = temphead;
960 temphead = NULL;
962 pid = fork ();
963 saved_errno = errno;
964 if (pid)
965 temphead = saved_temphead;
967 cs_leave (cs);
968 errno = saved_errno;
970 if (0 <= pid || errno != EAGAIN)
971 break;
972 else
974 xnanosleep (wait_retry);
975 wait_retry *= 2;
976 reap_some ();
980 if (pid < 0)
982 saved_errno = errno;
983 close (pipefds[0]);
984 close (pipefds[1]);
985 errno = saved_errno;
987 else if (pid == 0)
989 close (STDIN_FILENO);
990 close (STDOUT_FILENO);
992 else
993 ++nprocs;
995 return pid;
997 #else /* ! HAVE_WORKING_FORK */
998 return -1;
999 #endif
1002 /* Create a temporary file and start a compression program to filter output
1003 to that file. Set *PFP to the file handle and if PPID is non-NULL,
1004 set *PPID to the PID of the newly-created process. If the creation
1005 fails, return NULL if the failure is due to file descriptor
1006 exhaustion and SURVIVE_FD_EXHAUSTION; otherwise, die. */
1008 static char *
1009 maybe_create_temp (FILE **pfp, pid_t *ppid, bool survive_fd_exhaustion)
1011 int tempfd;
1012 struct tempnode *node = create_temp_file (&tempfd, survive_fd_exhaustion);
1013 char *name;
1015 if (! node)
1016 return NULL;
1018 name = node->name;
1020 if (compress_program)
1022 int pipefds[2];
1024 node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
1025 if (0 < node->pid)
1027 close (tempfd);
1028 close (pipefds[0]);
1029 tempfd = pipefds[1];
1031 register_proc (node->pid);
1033 else if (node->pid == 0)
1035 close (pipefds[1]);
1036 dup2_or_die (tempfd, STDOUT_FILENO);
1037 close (tempfd);
1038 dup2_or_die (pipefds[0], STDIN_FILENO);
1039 close (pipefds[0]);
1041 if (execlp (compress_program, compress_program, (char *) NULL) < 0)
1042 error (SORT_FAILURE, errno, _("couldn't execute %s"),
1043 compress_program);
1045 else
1046 node->pid = 0;
1049 *pfp = fdopen (tempfd, "w");
1050 if (! *pfp)
1051 die (_("couldn't create temporary file"), name);
1053 if (ppid)
1054 *ppid = node->pid;
1056 return name;
1059 /* Create a temporary file and start a compression program to filter output
1060 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
1061 set it to the PID of the newly-created process. Die on failure. */
1063 static char *
1064 create_temp (FILE **pfp, pid_t *ppid)
1066 return maybe_create_temp (pfp, ppid, false);
1069 /* Open a compressed temp file and start a decompression process through
1070 which to filter the input. PID must be the valid processes ID of the
1071 process used to compress the file. Return NULL (setting errno to
1072 EMFILE) if we ran out of file descriptors, and die on any other
1073 kind of failure. */
1075 static FILE *
1076 open_temp (const char *name, pid_t pid)
1078 int tempfd, pipefds[2];
1079 FILE *fp = NULL;
1081 wait_proc (pid);
1083 tempfd = open (name, O_RDONLY);
1084 if (tempfd < 0)
1085 return NULL;
1087 switch (pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS))
1089 case -1:
1090 if (errno != EMFILE)
1091 error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
1092 compress_program);
1093 close (tempfd);
1094 errno = EMFILE;
1095 break;
1097 case 0:
1098 close (pipefds[0]);
1099 dup2_or_die (tempfd, STDIN_FILENO);
1100 close (tempfd);
1101 dup2_or_die (pipefds[1], STDOUT_FILENO);
1102 close (pipefds[1]);
1104 execlp (compress_program, compress_program, "-d", (char *) NULL);
1105 error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
1106 compress_program);
1108 default:
1109 close (tempfd);
1110 close (pipefds[1]);
1112 fp = fdopen (pipefds[0], "r");
1113 if (! fp)
1115 int saved_errno = errno;
1116 close (pipefds[0]);
1117 errno = saved_errno;
1119 break;
1122 return fp;
1125 /* Append DIR to the array of temporary directory names. */
1126 static void
1127 add_temp_dir (char const *dir)
1129 if (temp_dir_count == temp_dir_alloc)
1130 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
1132 temp_dirs[temp_dir_count++] = dir;
1135 /* Remove NAME from the list of temporary files. */
1137 static void
1138 zaptemp (const char *name)
1140 struct tempnode *volatile *pnode;
1141 struct tempnode *node;
1142 struct tempnode *next;
1143 int unlink_status;
1144 int unlink_errno = 0;
1145 struct cs_status cs;
1147 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
1148 continue;
1150 /* Unlink the temporary file in a critical section to avoid races. */
1151 next = node->next;
1152 cs = cs_enter ();
1153 unlink_status = unlink (name);
1154 unlink_errno = errno;
1155 *pnode = next;
1156 cs_leave (cs);
1158 if (unlink_status != 0)
1159 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
1160 if (! next)
1161 temptail = pnode;
1162 free (node);
1165 #if HAVE_NL_LANGINFO
1167 static int
1168 struct_month_cmp (const void *m1, const void *m2)
1170 struct month const *month1 = m1;
1171 struct month const *month2 = m2;
1172 return strcmp (month1->name, month2->name);
1175 #endif
1177 /* Initialize the character class tables. */
1179 static void
1180 inittables (void)
1182 size_t i;
1184 for (i = 0; i < UCHAR_LIM; ++i)
1186 blanks[i] = !! isblank (i);
1187 nonprinting[i] = ! isprint (i);
1188 nondictionary[i] = ! isalnum (i) && ! isblank (i);
1189 fold_toupper[i] = toupper (i);
1192 #if HAVE_NL_LANGINFO
1193 /* If we're not in the "C" locale, read different names for months. */
1194 if (hard_LC_TIME)
1196 for (i = 0; i < MONTHS_PER_YEAR; i++)
1198 char const *s;
1199 size_t s_len;
1200 size_t j, k;
1201 char *name;
1203 s = (char *) nl_langinfo (ABMON_1 + i);
1204 s_len = strlen (s);
1205 monthtab[i].name = name = xmalloc (s_len + 1);
1206 monthtab[i].val = i + 1;
1208 for (j = k = 0; j < s_len; j++)
1209 if (! isblank (to_uchar (s[j])))
1210 name[k++] = fold_toupper[to_uchar (s[j])];
1211 name[k] = '\0';
1213 qsort ((void *) monthtab, MONTHS_PER_YEAR,
1214 sizeof *monthtab, struct_month_cmp);
1216 #endif
1219 /* Specify how many inputs may be merged at once.
1220 This may be set on the command-line with the
1221 --batch-size option. */
1222 static void
1223 specify_nmerge (int oi, char c, char const *s)
1225 uintmax_t n;
1226 struct rlimit rlimit;
1227 enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL);
1229 /* Try to find out how many file descriptors we'll be able
1230 to open. We need at least nmerge + 3 (STDIN_FILENO,
1231 STDOUT_FILENO and STDERR_FILENO). */
1232 unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0
1233 ? rlimit.rlim_cur
1234 : OPEN_MAX)
1235 - 3);
1237 if (e == LONGINT_OK)
1239 nmerge = n;
1240 if (nmerge != n)
1241 e = LONGINT_OVERFLOW;
1242 else
1244 if (nmerge < 2)
1246 error (0, 0, _("invalid --%s argument %s"),
1247 long_options[oi].name, quote (s));
1248 error (SORT_FAILURE, 0,
1249 _("minimum --%s argument is %s"),
1250 long_options[oi].name, quote ("2"));
1252 else if (max_nmerge < nmerge)
1254 e = LONGINT_OVERFLOW;
1256 else
1257 return;
1261 if (e == LONGINT_OVERFLOW)
1263 char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)];
1264 error (0, 0, _("--%s argument %s too large"),
1265 long_options[oi].name, quote (s));
1266 error (SORT_FAILURE, 0,
1267 _("maximum --%s argument with current rlimit is %s"),
1268 long_options[oi].name,
1269 uinttostr (max_nmerge, max_nmerge_buf));
1271 else
1272 xstrtol_fatal (e, oi, c, long_options, s);
1275 /* Specify the amount of main memory to use when sorting. */
1276 static void
1277 specify_sort_size (int oi, char c, char const *s)
1279 uintmax_t n;
1280 char *suffix;
1281 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1283 /* The default unit is KiB. */
1284 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1286 if (n <= UINTMAX_MAX / 1024)
1287 n *= 1024;
1288 else
1289 e = LONGINT_OVERFLOW;
1292 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1293 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1294 switch (suffix[0])
1296 case 'b':
1297 e = LONGINT_OK;
1298 break;
1300 case '%':
1302 double mem = physmem_total () * n / 100;
1304 /* Use "<", not "<=", to avoid problems with rounding. */
1305 if (mem < UINTMAX_MAX)
1307 n = mem;
1308 e = LONGINT_OK;
1310 else
1311 e = LONGINT_OVERFLOW;
1313 break;
1316 if (e == LONGINT_OK)
1318 /* If multiple sort sizes are specified, take the maximum, so
1319 that option order does not matter. */
1320 if (n < sort_size)
1321 return;
1323 sort_size = n;
1324 if (sort_size == n)
1326 sort_size = MAX (sort_size, MIN_SORT_SIZE);
1327 return;
1330 e = LONGINT_OVERFLOW;
1333 xstrtol_fatal (e, oi, c, long_options, s);
1336 /* Return the default sort size. */
1337 static size_t
1338 default_sort_size (void)
1340 /* Let MEM be available memory or 1/8 of total memory, whichever
1341 is greater. */
1342 double avail = physmem_available ();
1343 double total = physmem_total ();
1344 double mem = MAX (avail, total / 8);
1345 struct rlimit rlimit;
1347 /* Let SIZE be MEM, but no more than the maximum object size or
1348 system resource limits. Avoid the MIN macro here, as it is not
1349 quite right when only one argument is floating point. Don't
1350 bother to check for values like RLIM_INFINITY since in practice
1351 they are not much less than SIZE_MAX. */
1352 size_t size = SIZE_MAX;
1353 if (mem < size)
1354 size = mem;
1355 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1356 size = rlimit.rlim_cur;
1357 #ifdef RLIMIT_AS
1358 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1359 size = rlimit.rlim_cur;
1360 #endif
1362 /* Leave a large safety margin for the above limits, as failure can
1363 occur when they are exceeded. */
1364 size /= 2;
1366 #ifdef RLIMIT_RSS
1367 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1368 Exceeding RSS is not fatal, but can be quite slow. */
1369 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1370 size = rlimit.rlim_cur / 16 * 15;
1371 #endif
1373 /* Use no less than the minimum. */
1374 return MAX (size, MIN_SORT_SIZE);
1377 /* Return the sort buffer size to use with the input files identified
1378 by FPS and FILES, which are alternate names of the same files.
1379 NFILES gives the number of input files; NFPS may be less. Assume
1380 that each input line requires LINE_BYTES extra bytes' worth of line
1381 information. Do not exceed the size bound specified by the user
1382 (or a default size bound, if the user does not specify one). */
1384 static size_t
1385 sort_buffer_size (FILE *const *fps, size_t nfps,
1386 char *const *files, size_t nfiles,
1387 size_t line_bytes)
1389 /* A bound on the input size. If zero, the bound hasn't been
1390 determined yet. */
1391 static size_t size_bound;
1393 /* In the worst case, each input byte is a newline. */
1394 size_t worst_case_per_input_byte = line_bytes + 1;
1396 /* Keep enough room for one extra input line and an extra byte.
1397 This extra room might be needed when preparing to read EOF. */
1398 size_t size = worst_case_per_input_byte + 1;
1400 size_t i;
1402 for (i = 0; i < nfiles; i++)
1404 struct stat st;
1405 off_t file_size;
1406 size_t worst_case;
1408 if ((i < nfps ? fstat (fileno (fps[i]), &st)
1409 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1410 : stat (files[i], &st))
1411 != 0)
1412 die (_("stat failed"), files[i]);
1414 if (S_ISREG (st.st_mode))
1415 file_size = st.st_size;
1416 else
1418 /* The file has unknown size. If the user specified a sort
1419 buffer size, use that; otherwise, guess the size. */
1420 if (sort_size)
1421 return sort_size;
1422 file_size = INPUT_FILE_SIZE_GUESS;
1425 if (! size_bound)
1427 size_bound = sort_size;
1428 if (! size_bound)
1429 size_bound = default_sort_size ();
1432 /* Add the amount of memory needed to represent the worst case
1433 where the input consists entirely of newlines followed by a
1434 single non-newline. Check for overflow. */
1435 worst_case = file_size * worst_case_per_input_byte + 1;
1436 if (file_size != worst_case / worst_case_per_input_byte
1437 || size_bound - size <= worst_case)
1438 return size_bound;
1439 size += worst_case;
1442 return size;
1445 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1446 must be at least sizeof (struct line). Allocate ALLOC bytes
1447 initially. */
1449 static void
1450 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1452 /* Ensure that the line array is properly aligned. If the desired
1453 size cannot be allocated, repeatedly halve it until allocation
1454 succeeds. The smaller allocation may hurt overall performance,
1455 but that's better than failing. */
1456 while (true)
1458 alloc += sizeof (struct line) - alloc % sizeof (struct line);
1459 buf->buf = malloc (alloc);
1460 if (buf->buf)
1461 break;
1462 alloc /= 2;
1463 if (alloc <= line_bytes + 1)
1464 xalloc_die ();
1467 buf->line_bytes = line_bytes;
1468 buf->alloc = alloc;
1469 buf->used = buf->left = buf->nlines = 0;
1470 buf->eof = false;
1473 /* Return one past the limit of the line array. */
1475 static inline struct line *
1476 buffer_linelim (struct buffer const *buf)
1478 return (struct line *) (buf->buf + buf->alloc);
1481 /* Return a pointer to the first character of the field specified
1482 by KEY in LINE. */
1484 static char *
1485 begfield (const struct line *line, const struct keyfield *key)
1487 char *ptr = line->text, *lim = ptr + line->length - 1;
1488 size_t sword = key->sword;
1489 size_t schar = key->schar;
1491 /* The leading field separator itself is included in a field when -t
1492 is absent. */
1494 if (tab != TAB_DEFAULT)
1495 while (ptr < lim && sword--)
1497 while (ptr < lim && *ptr != tab)
1498 ++ptr;
1499 if (ptr < lim)
1500 ++ptr;
1502 else
1503 while (ptr < lim && sword--)
1505 while (ptr < lim && blanks[to_uchar (*ptr)])
1506 ++ptr;
1507 while (ptr < lim && !blanks[to_uchar (*ptr)])
1508 ++ptr;
1511 /* If we're ignoring leading blanks when computing the Start
1512 of the field, skip past them here. */
1513 if (key->skipsblanks)
1514 while (ptr < lim && blanks[to_uchar (*ptr)])
1515 ++ptr;
1517 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1518 ptr = MIN (lim, ptr + schar);
1520 return ptr;
1523 /* Return the limit of (a pointer to the first character after) the field
1524 in LINE specified by KEY. */
1526 static char *
1527 limfield (const struct line *line, const struct keyfield *key)
1529 char *ptr = line->text, *lim = ptr + line->length - 1;
1530 size_t eword = key->eword, echar = key->echar;
1532 if (echar == 0)
1533 eword++; /* Skip all of end field. */
1535 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1536 whichever comes first. If there are more than EWORD fields, leave
1537 PTR pointing at the beginning of the field having zero-based index,
1538 EWORD. If a delimiter character was specified (via -t), then that
1539 `beginning' is the first character following the delimiting TAB.
1540 Otherwise, leave PTR pointing at the first `blank' character after
1541 the preceding field. */
1542 if (tab != TAB_DEFAULT)
1543 while (ptr < lim && eword--)
1545 while (ptr < lim && *ptr != tab)
1546 ++ptr;
1547 if (ptr < lim && (eword || echar))
1548 ++ptr;
1550 else
1551 while (ptr < lim && eword--)
1553 while (ptr < lim && blanks[to_uchar (*ptr)])
1554 ++ptr;
1555 while (ptr < lim && !blanks[to_uchar (*ptr)])
1556 ++ptr;
1559 #ifdef POSIX_UNSPECIFIED
1560 /* The following block of code makes GNU sort incompatible with
1561 standard Unix sort, so it's ifdef'd out for now.
1562 The POSIX spec isn't clear on how to interpret this.
1563 FIXME: request clarification.
1565 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1566 Date: Thu, 30 May 96 12:20:41 -0400
1567 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1569 [...]I believe I've found another bug in `sort'.
1571 $ cat /tmp/sort.in
1572 a b c 2 d
1573 pq rs 1 t
1574 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1575 a b c 2 d
1576 pq rs 1 t
1577 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1578 pq rs 1 t
1579 a b c 2 d
1581 Unix sort produced the answer I expected: sort on the single character
1582 in column 7. GNU sort produced different results, because it disagrees
1583 on the interpretation of the key-end spec "M.N". Unix sort reads this
1584 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1585 "skip M-1 fields, then either N-1 characters or the rest of the current
1586 field, whichever comes first". This extra clause applies only to
1587 key-ends, not key-starts.
1590 /* Make LIM point to the end of (one byte past) the current field. */
1591 if (tab != TAB_DEFAULT)
1593 char *newlim;
1594 newlim = memchr (ptr, tab, lim - ptr);
1595 if (newlim)
1596 lim = newlim;
1598 else
1600 char *newlim;
1601 newlim = ptr;
1602 while (newlim < lim && blanks[to_uchar (*newlim)])
1603 ++newlim;
1604 while (newlim < lim && !blanks[to_uchar (*newlim)])
1605 ++newlim;
1606 lim = newlim;
1608 #endif
1610 if (echar != 0) /* We need to skip over a portion of the end field. */
1612 /* If we're ignoring leading blanks when computing the End
1613 of the field, skip past them here. */
1614 if (key->skipeblanks)
1615 while (ptr < lim && blanks[to_uchar (*ptr)])
1616 ++ptr;
1618 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1619 ptr = MIN (lim, ptr + echar);
1622 return ptr;
1625 /* Fill BUF reading from FP, moving buf->left bytes from the end
1626 of buf->buf to the beginning first. If EOF is reached and the
1627 file wasn't terminated by a newline, supply one. Set up BUF's line
1628 table too. FILE is the name of the file corresponding to FP.
1629 Return true if some input was read. */
1631 static bool
1632 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1634 struct keyfield const *key = keylist;
1635 char eol = eolchar;
1636 size_t line_bytes = buf->line_bytes;
1637 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1639 if (buf->eof)
1640 return false;
1642 if (buf->used != buf->left)
1644 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1645 buf->used = buf->left;
1646 buf->nlines = 0;
1649 while (true)
1651 char *ptr = buf->buf + buf->used;
1652 struct line *linelim = buffer_linelim (buf);
1653 struct line *line = linelim - buf->nlines;
1654 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1655 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1657 while (line_bytes + 1 < avail)
1659 /* Read as many bytes as possible, but do not read so many
1660 bytes that there might not be enough room for the
1661 corresponding line array. The worst case is when the
1662 rest of the input file consists entirely of newlines,
1663 except that the last byte is not a newline. */
1664 size_t readsize = (avail - 1) / (line_bytes + 1);
1665 size_t bytes_read = fread (ptr, 1, readsize, fp);
1666 char *ptrlim = ptr + bytes_read;
1667 char *p;
1668 avail -= bytes_read;
1670 if (bytes_read != readsize)
1672 if (ferror (fp))
1673 die (_("read failed"), file);
1674 if (feof (fp))
1676 buf->eof = true;
1677 if (buf->buf == ptrlim)
1678 return false;
1679 if (ptrlim[-1] != eol)
1680 *ptrlim++ = eol;
1684 /* Find and record each line in the just-read input. */
1685 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1687 ptr = p + 1;
1688 line--;
1689 line->text = line_start;
1690 line->length = ptr - line_start;
1691 mergesize = MAX (mergesize, line->length);
1692 avail -= line_bytes;
1694 if (key)
1696 /* Precompute the position of the first key for
1697 efficiency. */
1698 line->keylim = (key->eword == SIZE_MAX
1700 : limfield (line, key));
1702 if (key->sword != SIZE_MAX)
1703 line->keybeg = begfield (line, key);
1704 else
1706 if (key->skipsblanks)
1707 while (blanks[to_uchar (*line_start)])
1708 line_start++;
1709 line->keybeg = line_start;
1713 line_start = ptr;
1716 ptr = ptrlim;
1717 if (buf->eof)
1718 break;
1721 buf->used = ptr - buf->buf;
1722 buf->nlines = buffer_linelim (buf) - line;
1723 if (buf->nlines != 0)
1725 buf->left = ptr - line_start;
1726 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1727 return true;
1731 /* The current input line is too long to fit in the buffer.
1732 Double the buffer size and try again, keeping it properly
1733 aligned. */
1734 size_t line_alloc = buf->alloc / sizeof (struct line);
1735 buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1736 buf->alloc = line_alloc * sizeof (struct line);
1741 /* Exit with an error if a mixture of SI and IEC units detected. */
1743 static bool
1744 check_mixed_SI_IEC (char prefix, struct keyfield *key)
1746 int iec_present = prefix == 'i';
1747 if (key)
1749 if (key->iec_present != -1 && iec_present != key->iec_present)
1750 error (SORT_FAILURE, 0, _("both SI and IEC prefixes present on units"));
1751 key->iec_present = iec_present;
1753 return iec_present;
1756 /* Return an integer which represents the order of magnitude of
1757 the unit following the number. NUMBER can contain thousands separators
1758 or a decimal point, but not have preceeding blanks.
1759 Negative numbers return a negative unit order. */
1761 static int
1762 find_unit_order (const char *number, struct keyfield *key, char const **endptr)
1764 static const char orders [UCHAR_LIM] =
1766 #if SOME_DAY_WE_WILL_REQUIRE_C99
1767 ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
1768 ['k']=1,
1769 #else
1770 /* Generate the following table with this command:
1771 perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
1772 foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
1773 |fmt */
1774 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1775 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1776 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3,
1777 0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
1778 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1779 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1780 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1781 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1782 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1783 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1784 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1785 #endif
1788 const unsigned char *p = number;
1790 int sign = 1;
1792 if (*p == '-')
1794 sign = -1;
1795 p++;
1798 /* Scan to end of number.
1799 Decimals or separators not followed by digits stop the scan.
1800 Numbers ending in decimals or separators are thus considered
1801 to be lacking in units.
1802 FIXME: add support for multibyte thousands_sep and decimal_point. */
1804 while (ISDIGIT (*p))
1806 p++;
1808 if (*p == decimal_point && ISDIGIT (*(p + 1)))
1809 p += 2;
1810 else if (*p == thousands_sep && ISDIGIT (*(p + 1)))
1811 p += 2;
1814 int order = orders[*p];
1816 /* For valid units check for MiB vs MB etc. */
1817 if (order)
1819 p++;
1820 p += check_mixed_SI_IEC (*p, key);
1823 if (endptr)
1824 *endptr = p;
1826 return sign * order;
1829 /* Compare numbers ending in units with SI xor IEC prefixes
1830 <none/unknown> < K/k < M < G < T < P < E < Z < Y
1831 Assume that numbers are properly abbreviated.
1832 i.e. input will never have both 6000K and 5M. */
1834 static int
1835 human_numcompare (const char *a, const char *b, struct keyfield *key,
1836 char const **ea)
1838 while (blanks[to_uchar (*a)])
1839 a++;
1840 while (blanks[to_uchar (*b)])
1841 b++;
1843 int order_a = find_unit_order (a, key, ea);
1844 int order_b = find_unit_order (b, key, NULL);
1846 return (order_a > order_b ? 1
1847 : order_a < order_b ? -1
1848 : strnumcmp (a, b, decimal_point, thousands_sep));
1851 /* Compare strings A and B as numbers without explicitly converting them to
1852 machine numbers. Comparatively slow for short strings, but asymptotically
1853 hideously fast. */
1855 static int
1856 numcompare (const char *a, const char *b, char const **ea)
1858 while (blanks[to_uchar (*a)])
1859 a++;
1860 while (blanks[to_uchar (*b)])
1861 b++;
1863 if (debug)
1865 /* Approximate strnumcmp extents with find_unit_order. */
1866 if (find_unit_order (a, NULL, ea))
1868 *ea -= 1; /* ignore the order letter */
1869 *ea -= (**ea == 'i'); /* and IEC prefix */
1873 return strnumcmp (a, b, decimal_point, thousands_sep);
1876 static int
1877 general_numcompare (const char *sa, const char *sb, char const **ea)
1879 /* FIXME: maybe add option to try expensive FP conversion
1880 only if A and B can't be compared more cheaply/accurately. */
1882 #if HAVE_C99_STRTOLD /* provided by c-strtold module. */
1883 # define long_double long double
1884 #else
1885 # define long_double double
1886 # undef strtold
1887 # define strtold strtod
1888 #endif
1890 char *eb;
1891 long_double a = strtold (sa, (char **) ea);
1892 long_double b = strtold (sb, &eb);
1894 /* Put conversion errors at the start of the collating sequence. */
1895 if (sa == *ea)
1896 return sb == eb ? 0 : -1;
1897 if (sb == eb)
1898 return 1;
1900 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1901 conversion errors but before numbers; sort them by internal
1902 bit-pattern, for lack of a more portable alternative. */
1903 return (a < b ? -1
1904 : a > b ? 1
1905 : a == b ? 0
1906 : b == b ? -1
1907 : a == a ? 1
1908 : memcmp ((char *) &a, (char *) &b, sizeof a));
1911 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1912 Return 0 if the name in S is not recognized. */
1914 static int
1915 getmonth (char const *month, size_t len, char const **ea)
1917 size_t lo = 0;
1918 size_t hi = MONTHS_PER_YEAR;
1919 char const *monthlim = month + len;
1921 while (true)
1923 if (month == monthlim)
1924 return 0;
1925 if (!blanks[to_uchar (*month)])
1926 break;
1927 ++month;
1932 size_t ix = (lo + hi) / 2;
1933 char const *m = month;
1934 char const *n = monthtab[ix].name;
1936 for (;; m++, n++)
1938 if (!*n)
1940 if (ea)
1941 *ea = m;
1942 return monthtab[ix].val;
1944 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1946 hi = ix;
1947 break;
1949 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1951 lo = ix + 1;
1952 break;
1956 while (lo < hi);
1958 return 0;
1961 /* A source of random data. */
1962 static struct randread_source *randread_source;
1964 /* Return the Ith randomly-generated state. The caller must invoke
1965 random_state (H) for all H less than I before invoking random_state
1966 (I). */
1968 static struct md5_ctx
1969 random_state (size_t i)
1971 /* An array of states resulting from the random data, and counts of
1972 its used and allocated members. */
1973 static struct md5_ctx *state;
1974 static size_t used;
1975 static size_t allocated;
1977 struct md5_ctx *s = &state[i];
1979 if (used <= i)
1981 unsigned char buf[MD5_DIGEST_SIZE];
1983 used++;
1985 if (allocated <= i)
1987 state = X2NREALLOC (state, &allocated);
1988 s = &state[i];
1991 randread (randread_source, buf, sizeof buf);
1992 md5_init_ctx (s);
1993 md5_process_bytes (buf, sizeof buf, s);
1996 return *s;
1999 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
2000 with length LENGTHB. Return negative if less, zero if equal,
2001 positive if greater. */
2003 static int
2004 cmp_hashes (char const *texta, size_t lena,
2005 char const *textb, size_t lenb)
2007 /* Try random hashes until a pair of hashes disagree. But if the
2008 first pair of random hashes agree, check whether the keys are
2009 identical and if so report no difference. */
2010 int diff;
2011 size_t i;
2012 for (i = 0; ; i++)
2014 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
2015 struct md5_ctx s[2];
2016 s[0] = s[1] = random_state (i);
2017 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
2018 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
2019 diff = memcmp (dig[0], dig[1], sizeof dig[0]);
2020 if (diff != 0)
2021 break;
2022 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
2023 break;
2026 return diff;
2029 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
2030 using one or more random hash functions. */
2032 static int
2033 compare_random (char *restrict texta, size_t lena,
2034 char *restrict textb, size_t lenb)
2036 int diff;
2038 if (! hard_LC_COLLATE)
2039 diff = cmp_hashes (texta, lena, textb, lenb);
2040 else
2042 /* Transform the text into the basis of comparison, so that byte
2043 strings that would otherwise considered to be equal are
2044 considered equal here even if their bytes differ. */
2046 char *buf = NULL;
2047 char stackbuf[4000];
2048 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
2049 bool a_fits = tlena <= sizeof stackbuf;
2050 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
2051 (a_fits ? sizeof stackbuf - tlena : 0),
2052 textb, lenb);
2054 if (a_fits && tlena + tlenb <= sizeof stackbuf)
2055 buf = stackbuf;
2056 else
2058 /* Adding 1 to the buffer size lets xmemxfrm run a bit
2059 faster by avoiding the need for an extra buffer copy. */
2060 buf = xmalloc (tlena + tlenb + 1);
2061 xmemxfrm (buf, tlena + 1, texta, lena);
2062 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
2065 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
2067 if (buf != stackbuf)
2068 free (buf);
2071 return diff;
2074 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
2075 using filevercmp. See lib/filevercmp.h for function description. */
2077 static int
2078 compare_version (char *restrict texta, size_t lena,
2079 char *restrict textb, size_t lenb)
2081 int diff;
2083 /* It is necessary to save the character after the end of the field.
2084 "filevercmp" works with NUL terminated strings. Our blocks of
2085 text are not necessarily terminated with a NUL byte. */
2086 char sv_a = texta[lena];
2087 char sv_b = textb[lenb];
2089 texta[lena] = '\0';
2090 textb[lenb] = '\0';
2092 diff = filevercmp (texta, textb);
2094 texta[lena] = sv_a;
2095 textb[lenb] = sv_b;
2097 return diff;
2100 /* For debug mode, count tabs in the passed string
2101 so we can adjust the widths returned by mbswidth.
2102 FIXME: Should we generally be counting non printable chars? */
2104 static size_t
2105 count_tabs (char const *text, const size_t len)
2107 size_t tabs = 0;
2108 size_t tlen = strnlen (text, len);
2110 while (tlen--)
2112 if (*text++ == '\t')
2113 tabs++;
2116 return tabs;
2119 /* For debug mode, "underline" a key at the
2120 specified offset and screen width. */
2122 static void
2123 mark_key (size_t offset, size_t width)
2125 printf ("%*s", (int) offset, "");
2127 if (!width)
2128 printf (_("^ no match for key\n"));
2129 else
2131 while (width--)
2132 putchar ('_');
2133 putchar ('\n');
2137 /* For debug mode, determine the screen offset and width
2138 to highlight for a key, and then output the highlight. */
2140 static void
2141 debug_key (char const *sline, char const *sfield, char const *efield,
2142 size_t flen, bool skipb)
2144 char const *sa = sfield;
2146 if (skipb) /* This key type implicitly skips leading blanks. */
2148 while (sa < efield && blanks[to_uchar (*sa)])
2150 sa++;
2151 if (flen)
2152 flen--; /* This assumes TABs same width as SPACEs. */
2156 size_t offset = mbsnwidth (sline, sfield - sline, 0) + (sa - sfield);
2157 offset += count_tabs (sline, sfield - sline);
2159 size_t width = mbsnwidth (sa, flen, 0);
2160 width += count_tabs (sa, flen);
2162 mark_key (offset, width);
2165 /* Testing if a key is numeric is done in various places. */
2167 static inline bool
2168 key_numeric (struct keyfield const *key)
2170 return key->numeric || key->general_numeric || key->human_numeric;
2173 /* Return whether sorting options specified for key. */
2175 static bool
2176 default_key_compare (struct keyfield const *key)
2178 return ! (key->ignore
2179 || key->translate
2180 || key->skipsblanks
2181 || key->skipeblanks
2182 || key_numeric (key)
2183 || key->month
2184 || key->version
2185 || key->random
2186 /* || key->reverse */
2190 /* Convert a key to the short options used to specify it. */
2192 static void
2193 key_to_opts (struct keyfield const *key, char *opts)
2195 if (key->skipsblanks || key->skipeblanks)
2196 *opts++ = 'b';/* either disables global -b */
2197 if (key->ignore == nondictionary)
2198 *opts++ = 'd';
2199 if (key->translate)
2200 *opts++ = 'f';
2201 if (key->general_numeric)
2202 *opts++ = 'g';
2203 if (key->human_numeric)
2204 *opts++ = 'h';
2205 if (key->ignore == nonprinting)
2206 *opts++ = 'i';
2207 if (key->month)
2208 *opts++ = 'M';
2209 if (key->numeric)
2210 *opts++ = 'n';
2211 if (key->random)
2212 *opts++ = 'R';
2213 if (key->reverse)
2214 *opts++ = 'r';
2215 if (key->version)
2216 *opts++ = 'V';
2217 *opts = '\0';
2220 /* Output data independent key warnings to stderr. */
2222 static void
2223 key_warnings (struct keyfield const *gkey, bool gkey_only)
2225 struct keyfield const *key;
2226 struct keyfield ugkey = *gkey;
2227 unsigned long keynum = 1;
2229 for (key = keylist; key; key = key->next, keynum++)
2231 if (key->obsolete_used)
2233 /* obsolescent syntax +A.x -B.y is equivalent to:
2234 -k A+1.x+1,B.y (when y = 0)
2235 -k A+1.x+1,B+1.y (when y > 0) */
2236 char obuf[INT_BUFSIZE_BOUND (size_t) * 2 + 4]; /* +# -# */
2237 char nbuf[INT_BUFSIZE_BOUND (size_t) * 2 + 5]; /* -k #,# */
2239 char *po = obuf;
2240 char *pn = nbuf;
2242 size_t sword = key->sword;
2243 size_t eword = key->eword;
2244 if (sword == SIZE_MAX)
2245 sword++;
2247 po += sprintf (po, "+%" PRIuMAX, (uintmax_t) sword);
2248 pn += sprintf (pn, "-k %" PRIuMAX, (uintmax_t) sword + 1);
2249 if (key->eword != SIZE_MAX)
2251 po += sprintf (po, " -%" PRIuMAX, (uintmax_t) eword + 1);
2252 pn += sprintf (pn, ",%" PRIuMAX,
2253 (uintmax_t) eword + 1 + (key->echar == SIZE_MAX));
2255 error (0, 0, _("obsolescent key `%s' used; consider `%s' instead"),
2256 obuf, nbuf);
2259 /* Warn about field specs that will never match. */
2260 if (key->sword != SIZE_MAX && key->eword < key->sword)
2261 error (0, 0, _("key %lu has zero width and will be ignored"), keynum);
2263 /* Warn about significant leading blanks. */
2264 bool implicit_skip = key_numeric (key) || key->month;
2265 bool maybe_space_aligned = !hard_LC_COLLATE && default_key_compare (key)
2266 && !(key->schar || key->echar);
2267 bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */
2268 if (!gkey_only && tab == TAB_DEFAULT && !line_offset
2269 && ((!key->skipsblanks && !(implicit_skip || maybe_space_aligned))
2270 || (!key->skipsblanks && key->schar)
2271 || (!key->skipeblanks && key->echar)))
2272 error (0, 0, _("leading blanks are significant in key %lu; "
2273 "consider also specifying `b'"), keynum);
2275 /* Warn about numeric comparisons spanning fields,
2276 as field delimiters could be interpreted as part
2277 of the number (maybe only in other locales). */
2278 if (!gkey_only && key_numeric (key))
2280 size_t sword = key->sword + 1;
2281 size_t eword = key->eword + 1;
2282 if (!sword)
2283 sword++;
2284 if (sword != eword)
2285 error (0, 0, _("key %lu is numeric and spans multiple fields"),
2286 keynum);
2289 /* Flag global options not copied or specified in any key. */
2290 if (ugkey.ignore && (ugkey.ignore == key->ignore))
2291 ugkey.ignore = NULL;
2292 if (ugkey.translate && (ugkey.translate == key->translate))
2293 ugkey.translate = NULL;
2294 ugkey.skipsblanks &= !key->skipsblanks;
2295 ugkey.skipeblanks &= !key->skipeblanks;
2296 ugkey.month &= !key->month;
2297 ugkey.numeric &= !key->numeric;
2298 ugkey.general_numeric &= !key->general_numeric;
2299 ugkey.human_numeric &= !key->human_numeric;
2300 ugkey.random &= !key->random;
2301 ugkey.version &= !key->version;
2302 ugkey.reverse &= !key->reverse;
2305 /* Warn about ignored global options flagged above.
2306 Note if gkey is the only one in the list, all flags are cleared. */
2307 if (!default_key_compare (&ugkey)
2308 || (ugkey.reverse && (stable || unique) && keylist))
2310 bool ugkey_reverse = ugkey.reverse;
2311 if (!(stable || unique))
2312 ugkey.reverse = false;
2313 /* The following is too big, but guaranteed to be "big enough". */
2314 char opts[sizeof short_options];
2315 key_to_opts (&ugkey, opts);
2316 error (0, 0,
2317 ngettext ("option `-%s' is ignored",
2318 "options `-%s' are ignored",
2319 select_plural (strlen (opts))), opts);
2320 ugkey.reverse = ugkey_reverse;
2322 if (ugkey.reverse && !(stable || unique) && keylist)
2323 error (0, 0, _("option `-r' only applies to last-resort comparison"));
2326 /* Compare two lines A and B trying every key in sequence until there
2327 are no more keys or a difference is found. */
2329 static int
2330 keycompare (const struct line *a, const struct line *b, bool show_debug)
2332 struct keyfield *key = keylist;
2334 /* For the first iteration only, the key positions have been
2335 precomputed for us. */
2336 char *texta = a->keybeg;
2337 char *textb = b->keybeg;
2338 char *lima = a->keylim;
2339 char *limb = b->keylim;
2341 int diff;
2343 while (true)
2345 char const *translate = key->translate;
2346 bool const *ignore = key->ignore;
2347 bool skipb = false; /* Whether key type auto skips leading blanks. */
2349 /* Treat field ends before field starts as empty fields. */
2350 lima = MAX (texta, lima);
2351 limb = MAX (textb, limb);
2353 /* Find the lengths. */
2354 size_t lena = lima - texta;
2355 size_t lenb = limb - textb;
2357 /* Actually compare the fields. */
2359 if (key->random)
2360 diff = compare_random (texta, lena, textb, lenb);
2361 else if (key_numeric (key))
2363 char savea = *lima, saveb = *limb;
2364 char const* ea = lima;
2366 *lima = *limb = '\0';
2367 diff = (key->numeric ? numcompare (texta, textb, &ea)
2368 : key->general_numeric ? general_numcompare (texta, textb,
2369 &ea)
2370 : human_numcompare (texta, textb, key, &ea));
2371 if (show_debug)
2373 lena = ea - texta;
2374 skipb = true;
2376 *lima = savea, *limb = saveb;
2378 else if (key->version)
2379 diff = compare_version (texta, lena, textb, lenb);
2380 else if (key->month)
2382 char const *ea = lima;
2384 int amon = getmonth (texta, lena, &ea);
2385 diff = amon - getmonth (textb, lenb, NULL);
2387 if (show_debug)
2389 lena = amon ? ea - texta : 0;
2390 skipb = true;
2393 /* Sorting like this may become slow, so in a simple locale the user
2394 can select a faster sort that is similar to ascii sort. */
2395 else if (hard_LC_COLLATE)
2397 /* FIXME: for debug, account for skipped chars, while handling mb chars.
2398 Generally perhaps xmemfrm could be used to determine chars that are
2399 excluded from the collating order? */
2400 if (ignore || translate)
2402 char buf[4000];
2403 size_t size = lena + 1 + lenb + 1;
2404 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
2405 char *copy_b = copy_a + lena + 1;
2406 size_t new_len_a, new_len_b, i;
2408 /* Ignore and/or translate chars before comparing. */
2409 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
2411 if (i < lena)
2413 copy_a[new_len_a] = (translate
2414 ? translate[to_uchar (texta[i])]
2415 : texta[i]);
2416 if (!ignore || !ignore[to_uchar (texta[i])])
2417 ++new_len_a;
2419 if (i < lenb)
2421 copy_b[new_len_b] = (translate
2422 ? translate[to_uchar (textb[i])]
2423 : textb [i]);
2424 if (!ignore || !ignore[to_uchar (textb[i])])
2425 ++new_len_b;
2429 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
2431 if (sizeof buf < size)
2432 free (copy_a);
2434 else if (lena == 0)
2435 diff = - NONZERO (lenb);
2436 else if (lenb == 0)
2437 goto greater;
2438 else
2439 diff = xmemcoll (texta, lena, textb, lenb);
2441 else if (ignore)
2443 char *savea = texta;
2445 #define CMP_WITH_IGNORE(A, B) \
2446 do \
2448 while (true) \
2450 while (texta < lima && ignore[to_uchar (*texta)]) \
2451 ++texta; \
2452 while (textb < limb && ignore[to_uchar (*textb)]) \
2453 ++textb; \
2454 if (! (texta < lima && textb < limb)) \
2455 break; \
2456 diff = to_uchar (A) - to_uchar (B); \
2457 if (diff) \
2458 goto not_equal; \
2459 ++texta; \
2460 ++textb; \
2463 diff = (texta < lima) - (textb < limb); \
2465 while (0)
2467 if (translate)
2468 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
2469 translate[to_uchar (*textb)]);
2470 else
2471 CMP_WITH_IGNORE (*texta, *textb);
2473 /* We only need to restore this for debug_key
2474 in which case the keys being compared are equal. */
2475 texta = savea;
2477 else if (lena == 0)
2478 diff = - NONZERO (lenb);
2479 else if (lenb == 0)
2480 goto greater;
2481 else
2483 if (translate)
2485 char *savea = texta;
2487 while (texta < lima && textb < limb)
2489 diff = (to_uchar (translate[to_uchar (*texta++)])
2490 - to_uchar (translate[to_uchar (*textb++)]));
2491 if (diff)
2492 goto not_equal;
2495 /* We only need to restore this for debug_key
2496 in which case the keys being compared are equal. */
2497 texta = savea;
2499 else
2501 diff = memcmp (texta, textb, MIN (lena, lenb));
2502 if (diff)
2503 goto not_equal;
2505 diff = lena < lenb ? -1 : lena != lenb;
2508 if (diff)
2509 goto not_equal;
2511 if (show_debug)
2512 debug_key (a->text, texta, lima, lena, skipb);
2514 key = key->next;
2515 if (! key)
2516 break;
2518 /* Find the beginning and limit of the next field. */
2519 if (key->eword != SIZE_MAX)
2520 lima = limfield (a, key), limb = limfield (b, key);
2521 else
2522 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
2524 if (key->sword != SIZE_MAX)
2525 texta = begfield (a, key), textb = begfield (b, key);
2526 else
2528 texta = a->text, textb = b->text;
2529 if (key->skipsblanks)
2531 while (texta < lima && blanks[to_uchar (*texta)])
2532 ++texta;
2533 while (textb < limb && blanks[to_uchar (*textb)])
2534 ++textb;
2539 return 0;
2541 greater:
2542 diff = 1;
2543 not_equal:
2544 return key->reverse ? -diff : diff;
2547 /* Compare two lines A and B, returning negative, zero, or positive
2548 depending on whether A compares less than, equal to, or greater than B. */
2550 static int
2551 compare (const struct line *a, const struct line *b, bool show_debug)
2553 int diff;
2554 size_t alen, blen;
2556 /* First try to compare on the specified keys (if any).
2557 The only two cases with no key at all are unadorned sort,
2558 and unadorned sort -r. */
2559 if (keylist)
2561 diff = keycompare (a, b, show_debug);
2562 if (diff || unique || stable)
2563 return diff;
2566 /* If the keys all compare equal (or no keys were specified)
2567 fall through to the default comparison. */
2568 alen = a->length - 1, blen = b->length - 1;
2570 if (show_debug)
2571 debug_key (a->text, a->text, a->text + alen, alen, false);
2573 if (alen == 0)
2574 diff = - NONZERO (blen);
2575 else if (blen == 0)
2576 diff = 1;
2577 else if (hard_LC_COLLATE)
2578 diff = xmemcoll (a->text, alen, b->text, blen);
2579 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2580 diff = alen < blen ? -1 : alen != blen;
2582 return reverse ? -diff : diff;
2585 static void
2586 write_bytes (struct line const *line, FILE *fp, char const *output_file)
2588 char const *buf = line->text;
2589 size_t n_bytes = line->length;
2591 /* Convert TABs to '>' and \0 to \n when -z specified. */
2592 if (debug && fp == stdout)
2594 char const *ebuf = buf + n_bytes;
2595 char const *c = buf;
2597 while (c < ebuf)
2599 char wc = *c++;
2600 if (wc == '\t')
2601 wc = '>';
2602 else if (wc == 0 && eolchar == 0)
2603 wc = '\n';
2604 if (fputc (wc, fp) == EOF)
2605 die (_("write failed"), output_file);
2608 compare (line, line, true);
2610 else
2612 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
2613 die (_("write failed"), output_file);
2617 /* Check that the lines read from FILE_NAME come in order. Return
2618 true if they are in order. If CHECKONLY == 'c', also print a
2619 diagnostic (FILE_NAME, line number, contents of line) to stderr if
2620 they are not in order. */
2622 static bool
2623 check (char const *file_name, char checkonly)
2625 FILE *fp = xfopen (file_name, "r");
2626 struct buffer buf; /* Input buffer. */
2627 struct line temp; /* Copy of previous line. */
2628 size_t alloc = 0;
2629 uintmax_t line_number = 0;
2630 struct keyfield const *key = keylist;
2631 bool nonunique = ! unique;
2632 bool ordered = true;
2634 initbuf (&buf, sizeof (struct line),
2635 MAX (merge_buffer_size, sort_size));
2636 temp.text = NULL;
2638 while (fillbuf (&buf, fp, file_name))
2640 struct line const *line = buffer_linelim (&buf);
2641 struct line const *linebase = line - buf.nlines;
2643 /* Make sure the line saved from the old buffer contents is
2644 less than or equal to the first line of the new buffer. */
2645 if (alloc && nonunique <= compare (&temp, line - 1, false))
2647 found_disorder:
2649 if (checkonly == 'c')
2651 struct line const *disorder_line = line - 1;
2652 uintmax_t disorder_line_number =
2653 buffer_linelim (&buf) - disorder_line + line_number;
2654 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2655 fprintf (stderr, _("%s: %s:%s: disorder: "),
2656 program_name, file_name,
2657 umaxtostr (disorder_line_number, hr_buf));
2658 if (debug)
2659 fputc ('\n', stderr);
2660 write_bytes (disorder_line, debug ? stdout : stderr,
2661 debug ? _("standard out") : _("standard error"));
2664 ordered = false;
2665 break;
2669 /* Compare each line in the buffer with its successor. */
2670 while (linebase < --line)
2671 if (nonunique <= compare (line, line - 1, false))
2672 goto found_disorder;
2674 line_number += buf.nlines;
2676 /* Save the last line of the buffer. */
2677 if (alloc < line->length)
2681 alloc *= 2;
2682 if (! alloc)
2684 alloc = line->length;
2685 break;
2688 while (alloc < line->length);
2690 temp.text = xrealloc (temp.text, alloc);
2692 memcpy (temp.text, line->text, line->length);
2693 temp.length = line->length;
2694 if (key)
2696 temp.keybeg = temp.text + (line->keybeg - line->text);
2697 temp.keylim = temp.text + (line->keylim - line->text);
2701 xfclose (fp, file_name);
2702 free (buf.buf);
2703 free (temp.text);
2704 return ordered;
2707 /* Open FILES (there are NFILES of them) and store the resulting array
2708 of stream pointers into (*PFPS). Allocate the array. Return the
2709 number of successfully opened files, setting errno if this value is
2710 less than NFILES. */
2712 static size_t
2713 open_input_files (struct sortfile *files, size_t nfiles, FILE ***pfps)
2715 FILE **fps = *pfps = xnmalloc (nfiles, sizeof *fps);
2716 int i;
2718 /* Open as many input files as we can. */
2719 for (i = 0; i < nfiles; i++)
2721 fps[i] = (files[i].pid
2722 ? open_temp (files[i].name, files[i].pid)
2723 : stream_open (files[i].name, "r"));
2724 if (!fps[i])
2725 break;
2728 return i;
2731 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2732 files (all of which are at the start of the FILES array), and
2733 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2734 FPS is the vector of open stream corresponding to the files.
2735 Close input and output streams before returning.
2736 OUTPUT_FILE gives the name of the output file. If it is NULL,
2737 the output file is standard output. */
2739 static void
2740 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2741 FILE *ofp, char const *output_file, FILE **fps)
2743 struct buffer *buffer = xnmalloc (nfiles, sizeof *buffer);
2744 /* Input buffers for each file. */
2745 struct line saved; /* Saved line storage for unique check. */
2746 struct line const *savedline = NULL;
2747 /* &saved if there is a saved line. */
2748 size_t savealloc = 0; /* Size allocated for the saved line. */
2749 struct line const **cur = xnmalloc (nfiles, sizeof *cur);
2750 /* Current line in each line table. */
2751 struct line const **base = xnmalloc (nfiles, sizeof *base);
2752 /* Base of each line table. */
2753 size_t *ord = xnmalloc (nfiles, sizeof *ord);
2754 /* Table representing a permutation of fps,
2755 such that cur[ord[0]] is the smallest line
2756 and will be next output. */
2757 size_t i;
2758 size_t j;
2759 size_t t;
2760 struct keyfield const *key = keylist;
2761 saved.text = NULL;
2763 /* Read initial lines from each input file. */
2764 for (i = 0; i < nfiles; )
2766 initbuf (&buffer[i], sizeof (struct line),
2767 MAX (merge_buffer_size, sort_size / nfiles));
2768 if (fillbuf (&buffer[i], fps[i], files[i].name))
2770 struct line const *linelim = buffer_linelim (&buffer[i]);
2771 cur[i] = linelim - 1;
2772 base[i] = linelim - buffer[i].nlines;
2773 i++;
2775 else
2777 /* fps[i] is empty; eliminate it from future consideration. */
2778 xfclose (fps[i], files[i].name);
2779 if (i < ntemps)
2781 ntemps--;
2782 zaptemp (files[i].name);
2784 free (buffer[i].buf);
2785 --nfiles;
2786 for (j = i; j < nfiles; ++j)
2788 files[j] = files[j + 1];
2789 fps[j] = fps[j + 1];
2794 /* Set up the ord table according to comparisons among input lines.
2795 Since this only reorders two items if one is strictly greater than
2796 the other, it is stable. */
2797 for (i = 0; i < nfiles; ++i)
2798 ord[i] = i;
2799 for (i = 1; i < nfiles; ++i)
2800 if (0 < compare (cur[ord[i - 1]], cur[ord[i]], false))
2801 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2803 /* Repeatedly output the smallest line until no input remains. */
2804 while (nfiles)
2806 struct line const *smallest = cur[ord[0]];
2808 /* If uniquified output is turned on, output only the first of
2809 an identical series of lines. */
2810 if (unique)
2812 if (savedline && compare (savedline, smallest, false))
2814 savedline = NULL;
2815 write_bytes (&saved, ofp, output_file);
2817 if (!savedline)
2819 savedline = &saved;
2820 if (savealloc < smallest->length)
2823 if (! savealloc)
2825 savealloc = smallest->length;
2826 break;
2828 while ((savealloc *= 2) < smallest->length);
2830 saved.text = xrealloc (saved.text, savealloc);
2832 saved.length = smallest->length;
2833 memcpy (saved.text, smallest->text, saved.length);
2834 if (key)
2836 saved.keybeg =
2837 saved.text + (smallest->keybeg - smallest->text);
2838 saved.keylim =
2839 saved.text + (smallest->keylim - smallest->text);
2843 else
2844 write_bytes (smallest, ofp, output_file);
2846 /* Check if we need to read more lines into core. */
2847 if (base[ord[0]] < smallest)
2848 cur[ord[0]] = smallest - 1;
2849 else
2851 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2853 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2854 cur[ord[0]] = linelim - 1;
2855 base[ord[0]] = linelim - buffer[ord[0]].nlines;
2857 else
2859 /* We reached EOF on fps[ord[0]]. */
2860 for (i = 1; i < nfiles; ++i)
2861 if (ord[i] > ord[0])
2862 --ord[i];
2863 --nfiles;
2864 xfclose (fps[ord[0]], files[ord[0]].name);
2865 if (ord[0] < ntemps)
2867 ntemps--;
2868 zaptemp (files[ord[0]].name);
2870 free (buffer[ord[0]].buf);
2871 for (i = ord[0]; i < nfiles; ++i)
2873 fps[i] = fps[i + 1];
2874 files[i] = files[i + 1];
2875 buffer[i] = buffer[i + 1];
2876 cur[i] = cur[i + 1];
2877 base[i] = base[i + 1];
2879 for (i = 0; i < nfiles; ++i)
2880 ord[i] = ord[i + 1];
2881 continue;
2885 /* The new line just read in may be larger than other lines
2886 already in main memory; push it back in the queue until we
2887 encounter a line larger than it. Optimize for the common
2888 case where the new line is smallest. */
2890 size_t lo = 1;
2891 size_t hi = nfiles;
2892 size_t probe = lo;
2893 size_t ord0 = ord[0];
2894 size_t count_of_smaller_lines;
2896 while (lo < hi)
2898 int cmp = compare (cur[ord0], cur[ord[probe]], false);
2899 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2900 hi = probe;
2901 else
2902 lo = probe + 1;
2903 probe = (lo + hi) / 2;
2906 count_of_smaller_lines = lo - 1;
2907 for (j = 0; j < count_of_smaller_lines; j++)
2908 ord[j] = ord[j + 1];
2909 ord[count_of_smaller_lines] = ord0;
2912 /* Free up some resources every once in a while. */
2913 if (MAX_PROCS_BEFORE_REAP < nprocs)
2914 reap_some ();
2917 if (unique && savedline)
2919 write_bytes (&saved, ofp, output_file);
2920 free (saved.text);
2923 xfclose (ofp, output_file);
2924 free (fps);
2925 free (buffer);
2926 free (ord);
2927 free (base);
2928 free (cur);
2931 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2932 files (all of which are at the start of the FILES array), and
2933 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2934 Close input and output files before returning.
2935 OUTPUT_FILE gives the name of the output file.
2937 Return the number of files successfully merged. This number can be
2938 less than NFILES if we ran low on file descriptors, but in this
2939 case it is never less than 2. */
2941 static size_t
2942 mergefiles (struct sortfile *files, size_t ntemps, size_t nfiles,
2943 FILE *ofp, char const *output_file)
2945 FILE **fps;
2946 size_t nopened = open_input_files (files, nfiles, &fps);
2947 if (nopened < nfiles && nopened < 2)
2948 die (_("open failed"), files[nopened].name);
2949 mergefps (files, ntemps, nopened, ofp, output_file, fps);
2950 return nopened;
2953 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2954 and HI (with NHI members). T, LO, and HI point just past their
2955 respective arrays, and the arrays are in reverse order. NLO and
2956 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2958 static inline void
2959 mergelines (struct line *t,
2960 struct line const *lo, size_t nlo,
2961 struct line const *hi, size_t nhi)
2963 while (true)
2964 if (compare (lo - 1, hi - 1, false) <= 0)
2966 *--t = *--lo;
2967 if (! --nlo)
2969 /* HI - NHI equalled T - (NLO + NHI) when this function
2970 began. Therefore HI must equal T now, and there is no
2971 need to copy from HI to T. */
2972 return;
2975 else
2977 *--t = *--hi;
2978 if (! --nhi)
2981 *--t = *--lo;
2982 while (--nlo);
2984 return;
2989 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2990 NLINES must be at least 2.
2991 The input and output arrays are in reverse order, and LINES and
2992 TEMP point just past the end of their respective arrays.
2994 Use a recursive divide-and-conquer algorithm, in the style
2995 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2996 the optimization suggested by exercise 5.2.4-10; this requires room
2997 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2998 writes that this memory optimization was originally published by
2999 D. A. Bell, Comp J. 1 (1958), 75. */
3001 static void
3002 sortlines (struct line *lines, size_t nlines, struct line *temp)
3004 if (nlines == 2)
3006 if (0 < compare (&lines[-1], &lines[-2], false))
3008 struct line tmp = lines[-1];
3009 lines[-1] = lines[-2];
3010 lines[-2] = tmp;
3013 else
3015 size_t nlo = nlines / 2;
3016 size_t nhi = nlines - nlo;
3017 struct line *lo = lines;
3018 struct line *hi = lines - nlo;
3019 struct line *sorted_lo = temp;
3021 sortlines (hi, nhi, temp);
3022 if (1 < nlo)
3023 sortlines_temp (lo, nlo, sorted_lo);
3024 else
3025 sorted_lo[-1] = lo[-1];
3027 mergelines (lines, sorted_lo, nlo, hi, nhi);
3031 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
3032 rather than sorting in place. */
3034 static void
3035 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
3037 if (nlines == 2)
3039 /* Declare `swap' as int, not bool, to work around a bug
3040 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
3041 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
3042 int swap = (0 < compare (&lines[-1], &lines[-2], false));
3043 temp[-1] = lines[-1 - swap];
3044 temp[-2] = lines[-2 + swap];
3046 else
3048 size_t nlo = nlines / 2;
3049 size_t nhi = nlines - nlo;
3050 struct line *lo = lines;
3051 struct line *hi = lines - nlo;
3052 struct line *sorted_hi = temp - nlo;
3054 sortlines_temp (hi, nhi, sorted_hi);
3055 if (1 < nlo)
3056 sortlines (lo, nlo, temp);
3058 mergelines (temp, lo, nlo, sorted_hi, nhi);
3062 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
3063 the same as OUTFILE. If found, merge the found instances (and perhaps
3064 some other files) into a temporary file so that it can in turn be
3065 merged into OUTFILE without destroying OUTFILE before it is completely
3066 read. Return the new value of NFILES, which differs from the old if
3067 some merging occurred.
3069 This test ensures that an otherwise-erroneous use like
3070 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
3071 It's not clear that POSIX requires this nicety.
3072 Detect common error cases, but don't try to catch obscure cases like
3073 "cat ... FILE ... | sort -m -o FILE"
3074 where traditional "sort" doesn't copy the input and where
3075 people should know that they're getting into trouble anyway.
3076 Catching these obscure cases would slow down performance in
3077 common cases. */
3079 static size_t
3080 avoid_trashing_input (struct sortfile *files, size_t ntemps,
3081 size_t nfiles, char const *outfile)
3083 size_t i;
3084 bool got_outstat = false;
3085 struct stat outstat;
3087 for (i = ntemps; i < nfiles; i++)
3089 bool is_stdin = STREQ (files[i].name, "-");
3090 bool same;
3091 struct stat instat;
3093 if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
3094 same = true;
3095 else
3097 if (! got_outstat)
3099 if ((outfile
3100 ? stat (outfile, &outstat)
3101 : fstat (STDOUT_FILENO, &outstat))
3102 != 0)
3103 break;
3104 got_outstat = true;
3107 same = (((is_stdin
3108 ? fstat (STDIN_FILENO, &instat)
3109 : stat (files[i].name, &instat))
3110 == 0)
3111 && SAME_INODE (instat, outstat));
3114 if (same)
3116 FILE *tftp;
3117 pid_t pid;
3118 char *temp = create_temp (&tftp, &pid);
3119 size_t num_merged = 0;
3122 num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp);
3123 files[i].name = temp;
3124 files[i].pid = pid;
3126 if (i + num_merged < nfiles)
3127 memmove (&files[i + 1], &files[i + num_merged],
3128 num_merged * sizeof *files);
3129 ntemps += 1;
3130 nfiles -= num_merged - 1;;
3131 i += num_merged;
3133 while (i < nfiles);
3137 return nfiles;
3140 /* Merge the input FILES. NTEMPS is the number of files at the
3141 start of FILES that are temporary; it is zero at the top level.
3142 NFILES is the total number of files. Put the output in
3143 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
3145 static void
3146 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
3147 char const *output_file)
3149 while (nmerge < nfiles)
3151 /* Number of input files processed so far. */
3152 size_t in;
3154 /* Number of output files generated so far. */
3155 size_t out;
3157 /* nfiles % NMERGE; this counts input files that are left over
3158 after all full-sized merges have been done. */
3159 size_t remainder;
3161 /* Number of easily-available slots at the next loop iteration. */
3162 size_t cheap_slots;
3164 /* Do as many NMERGE-size merges as possible. In the case that
3165 nmerge is bogus, increment by the maximum number of file
3166 descriptors allowed. */
3167 for (out = in = 0; nmerge <= nfiles - in; out++)
3169 FILE *tfp;
3170 pid_t pid;
3171 char *temp = create_temp (&tfp, &pid);
3172 size_t num_merged = mergefiles (&files[in], MIN (ntemps, nmerge),
3173 nmerge, tfp, temp);
3174 ntemps -= MIN (ntemps, num_merged);
3175 files[out].name = temp;
3176 files[out].pid = pid;
3177 in += num_merged;
3180 remainder = nfiles - in;
3181 cheap_slots = nmerge - out % nmerge;
3183 if (cheap_slots < remainder)
3185 /* So many files remain that they can't all be put into the last
3186 NMERGE-sized output window. Do one more merge. Merge as few
3187 files as possible, to avoid needless I/O. */
3188 size_t nshortmerge = remainder - cheap_slots + 1;
3189 FILE *tfp;
3190 pid_t pid;
3191 char *temp = create_temp (&tfp, &pid);
3192 size_t num_merged = mergefiles (&files[in], MIN (ntemps, nshortmerge),
3193 nshortmerge, tfp, temp);
3194 ntemps -= MIN (ntemps, num_merged);
3195 files[out].name = temp;
3196 files[out++].pid = pid;
3197 in += num_merged;
3200 /* Put the remaining input files into the last NMERGE-sized output
3201 window, so they will be merged in the next pass. */
3202 memmove (&files[out], &files[in], (nfiles - in) * sizeof *files);
3203 ntemps += out;
3204 nfiles -= in - out;
3207 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
3209 /* We aren't guaranteed that this final mergefiles will work, therefore we
3210 try to merge into the output, and then merge as much as we can into a
3211 temp file if we can't. Repeat. */
3213 while (true)
3215 /* Merge directly into the output file if possible. */
3216 FILE **fps;
3217 size_t nopened = open_input_files (files, nfiles, &fps);
3219 if (nopened == nfiles)
3221 FILE *ofp = stream_open (output_file, "w");
3222 if (ofp)
3224 mergefps (files, ntemps, nfiles, ofp, output_file, fps);
3225 break;
3227 if (errno != EMFILE || nopened <= 2)
3228 die (_("open failed"), output_file);
3230 else if (nopened <= 2)
3231 die (_("open failed"), files[nopened].name);
3233 /* We ran out of file descriptors. Close one of the input
3234 files, to gain a file descriptor. Then create a temporary
3235 file with our spare file descriptor. Retry if that failed
3236 (e.g., some other process could open a file between the time
3237 we closed and tried to create). */
3238 FILE *tfp;
3239 pid_t pid;
3240 char *temp;
3243 nopened--;
3244 xfclose (fps[nopened], files[nopened].name);
3245 temp = maybe_create_temp (&tfp, &pid, ! (nopened <= 2));
3247 while (!temp);
3249 /* Merge into the newly allocated temporary. */
3250 mergefps (&files[0], MIN (ntemps, nopened), nopened, tfp, temp, fps);
3251 ntemps -= MIN (ntemps, nopened);
3252 files[0].name = temp;
3253 files[0].pid = pid;
3255 memmove (&files[1], &files[nopened], (nfiles - nopened) * sizeof *files);
3256 ntemps++;
3257 nfiles -= nopened - 1;
3261 /* Sort NFILES FILES onto OUTPUT_FILE. */
3263 static void
3264 sort (char * const *files, size_t nfiles, char const *output_file)
3266 struct buffer buf;
3267 size_t ntemps = 0;
3268 bool output_file_created = false;
3270 buf.alloc = 0;
3272 while (nfiles)
3274 char const *temp_output;
3275 char const *file = *files;
3276 FILE *fp = xfopen (file, "r");
3277 FILE *tfp;
3278 size_t bytes_per_line = (2 * sizeof (struct line)
3279 - sizeof (struct line) / 2);
3281 if (! buf.alloc)
3282 initbuf (&buf, bytes_per_line,
3283 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
3284 buf.eof = false;
3285 files++;
3286 nfiles--;
3288 while (fillbuf (&buf, fp, file))
3290 struct line *line;
3291 struct line *linebase;
3293 if (buf.eof && nfiles
3294 && (bytes_per_line + 1
3295 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
3297 /* End of file, but there is more input and buffer room.
3298 Concatenate the next input file; this is faster in
3299 the usual case. */
3300 buf.left = buf.used;
3301 break;
3304 line = buffer_linelim (&buf);
3305 linebase = line - buf.nlines;
3306 if (1 < buf.nlines)
3307 sortlines (line, buf.nlines, linebase);
3308 if (buf.eof && !nfiles && !ntemps && !buf.left)
3310 xfclose (fp, file);
3311 tfp = xfopen (output_file, "w");
3312 temp_output = output_file;
3313 output_file_created = true;
3315 else
3317 ++ntemps;
3318 temp_output = create_temp (&tfp, NULL);
3323 line--;
3324 write_bytes (line, tfp, temp_output);
3325 if (unique)
3326 while (linebase < line && compare (line, line - 1, false) == 0)
3327 line--;
3329 while (linebase < line);
3331 xfclose (tfp, temp_output);
3333 /* Free up some resources every once in a while. */
3334 if (MAX_PROCS_BEFORE_REAP < nprocs)
3335 reap_some ();
3337 if (output_file_created)
3338 goto finish;
3340 xfclose (fp, file);
3343 finish:
3344 free (buf.buf);
3346 if (! output_file_created)
3348 size_t i;
3349 struct tempnode *node = temphead;
3350 struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
3351 for (i = 0; node; i++)
3353 tempfiles[i].name = node->name;
3354 tempfiles[i].pid = node->pid;
3355 node = node->next;
3357 merge (tempfiles, ntemps, ntemps, output_file);
3358 free (tempfiles);
3362 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
3364 static void
3365 insertkey (struct keyfield *key_arg)
3367 struct keyfield **p;
3368 struct keyfield *key = xmemdup (key_arg, sizeof *key);
3370 for (p = &keylist; *p; p = &(*p)->next)
3371 continue;
3372 *p = key;
3373 key->next = NULL;
3376 /* Report a bad field specification SPEC, with extra info MSGID. */
3378 static void badfieldspec (char const *, char const *)
3379 ATTRIBUTE_NORETURN;
3380 static void
3381 badfieldspec (char const *spec, char const *msgid)
3383 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
3384 _(msgid), quote (spec));
3385 abort ();
3388 /* Report incompatible options. */
3390 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
3391 static void
3392 incompatible_options (char const *opts)
3394 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
3395 abort ();
3398 /* Check compatibility of ordering options. */
3400 static void
3401 check_ordering_compatibility (void)
3403 struct keyfield *key;
3405 for (key = keylist; key; key = key->next)
3406 if ((1 < (key->random + key->numeric + key->general_numeric + key->month
3407 + key->version + !!key->ignore + key->human_numeric))
3408 || (key->random && key->translate))
3410 /* The following is too big, but guaranteed to be "big enough". */
3411 char opts[sizeof short_options];
3412 /* Clear flags we're not interested in. */
3413 key->skipsblanks = key->skipeblanks = key->reverse = false;
3414 key_to_opts (key, opts);
3415 incompatible_options (opts);
3419 /* Parse the leading integer in STRING and store the resulting value
3420 (which must fit into size_t) into *VAL. Return the address of the
3421 suffix after the integer. If the value is too large, silently
3422 substitute SIZE_MAX. If MSGID is NULL, return NULL after
3423 failure; otherwise, report MSGID and exit on failure. */
3425 static char const *
3426 parse_field_count (char const *string, size_t *val, char const *msgid)
3428 char *suffix;
3429 uintmax_t n;
3431 switch (xstrtoumax (string, &suffix, 10, &n, ""))
3433 case LONGINT_OK:
3434 case LONGINT_INVALID_SUFFIX_CHAR:
3435 *val = n;
3436 if (*val == n)
3437 break;
3438 /* Fall through. */
3439 case LONGINT_OVERFLOW:
3440 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
3441 *val = SIZE_MAX;
3442 break;
3444 case LONGINT_INVALID:
3445 if (msgid)
3446 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
3447 _(msgid), quote (string));
3448 return NULL;
3451 return suffix;
3454 /* Handle interrupts and hangups. */
3456 static void
3457 sighandler (int sig)
3459 if (! SA_NOCLDSTOP)
3460 signal (sig, SIG_IGN);
3462 cleanup ();
3464 signal (sig, SIG_DFL);
3465 raise (sig);
3468 /* Set the ordering options for KEY specified in S.
3469 Return the address of the first character in S that
3470 is not a valid ordering option.
3471 BLANKTYPE is the kind of blanks that 'b' should skip. */
3473 static char *
3474 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
3476 while (*s)
3478 switch (*s)
3480 case 'b':
3481 if (blanktype == bl_start || blanktype == bl_both)
3482 key->skipsblanks = true;
3483 if (blanktype == bl_end || blanktype == bl_both)
3484 key->skipeblanks = true;
3485 break;
3486 case 'd':
3487 key->ignore = nondictionary;
3488 break;
3489 case 'f':
3490 key->translate = fold_toupper;
3491 break;
3492 case 'g':
3493 key->general_numeric = true;
3494 break;
3495 case 'h':
3496 key->human_numeric = true;
3497 break;
3498 case 'i':
3499 /* Option order should not matter, so don't let -i override
3500 -d. -d implies -i, but -i does not imply -d. */
3501 if (! key->ignore)
3502 key->ignore = nonprinting;
3503 break;
3504 case 'M':
3505 key->month = true;
3506 break;
3507 case 'n':
3508 key->numeric = true;
3509 break;
3510 case 'R':
3511 key->random = true;
3512 break;
3513 case 'r':
3514 key->reverse = true;
3515 break;
3516 case 'V':
3517 key->version = true;
3518 break;
3519 default:
3520 return (char *) s;
3522 ++s;
3524 return (char *) s;
3527 static struct keyfield *
3528 key_init (struct keyfield *key)
3530 memset (key, 0, sizeof *key);
3531 key->eword = SIZE_MAX;
3532 key->iec_present = -1;
3533 return key;
3537 main (int argc, char **argv)
3539 struct keyfield *key;
3540 struct keyfield key_buf;
3541 struct keyfield gkey;
3542 bool gkey_only = false;
3543 char const *s;
3544 int c = 0;
3545 char checkonly = 0;
3546 bool mergeonly = false;
3547 char *random_source = NULL;
3548 bool need_random = false;
3549 size_t nfiles = 0;
3550 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
3551 bool obsolete_usage = (posix2_version () < 200112);
3552 char **files;
3553 char *files_from = NULL;
3554 struct Tokens tok;
3555 char const *outfile = NULL;
3557 initialize_main (&argc, &argv);
3558 set_program_name (argv[0]);
3559 setlocale (LC_ALL, "");
3560 bindtextdomain (PACKAGE, LOCALEDIR);
3561 textdomain (PACKAGE);
3563 initialize_exit_failure (SORT_FAILURE);
3565 hard_LC_COLLATE = hard_locale (LC_COLLATE);
3566 #if HAVE_NL_LANGINFO
3567 hard_LC_TIME = hard_locale (LC_TIME);
3568 #endif
3570 /* Get locale's representation of the decimal point. */
3572 struct lconv const *locale = localeconv ();
3574 /* If the locale doesn't define a decimal point, or if the decimal
3575 point is multibyte, use the C locale's decimal point. FIXME:
3576 add support for multibyte decimal points. */
3577 decimal_point = to_uchar (locale->decimal_point[0]);
3578 if (! decimal_point || locale->decimal_point[1])
3579 decimal_point = '.';
3581 /* FIXME: add support for multibyte thousands separators. */
3582 thousands_sep = to_uchar (*locale->thousands_sep);
3583 if (! thousands_sep || locale->thousands_sep[1])
3584 thousands_sep = -1;
3587 have_read_stdin = false;
3588 inittables ();
3591 size_t i;
3592 static int const sig[] =
3594 /* The usual suspects. */
3595 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
3596 #ifdef SIGPOLL
3597 SIGPOLL,
3598 #endif
3599 #ifdef SIGPROF
3600 SIGPROF,
3601 #endif
3602 #ifdef SIGVTALRM
3603 SIGVTALRM,
3604 #endif
3605 #ifdef SIGXCPU
3606 SIGXCPU,
3607 #endif
3608 #ifdef SIGXFSZ
3609 SIGXFSZ,
3610 #endif
3612 enum { nsigs = ARRAY_CARDINALITY (sig) };
3614 #if SA_NOCLDSTOP
3615 struct sigaction act;
3617 sigemptyset (&caught_signals);
3618 for (i = 0; i < nsigs; i++)
3620 sigaction (sig[i], NULL, &act);
3621 if (act.sa_handler != SIG_IGN)
3622 sigaddset (&caught_signals, sig[i]);
3625 act.sa_handler = sighandler;
3626 act.sa_mask = caught_signals;
3627 act.sa_flags = 0;
3629 for (i = 0; i < nsigs; i++)
3630 if (sigismember (&caught_signals, sig[i]))
3631 sigaction (sig[i], &act, NULL);
3632 #else
3633 for (i = 0; i < nsigs; i++)
3634 if (signal (sig[i], SIG_IGN) != SIG_IGN)
3636 signal (sig[i], sighandler);
3637 siginterrupt (sig[i], 1);
3639 #endif
3641 signal (SIGCHLD, SIG_DFL); /* Don't inherit CHLD handling from parent. */
3643 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
3644 atexit (exit_cleanup);
3646 key_init (&gkey);
3647 gkey.sword = SIZE_MAX;
3649 files = xnmalloc (argc, sizeof *files);
3651 while (true)
3653 /* Parse an operand as a file after "--" was seen; or if
3654 pedantic and a file was seen, unless the POSIX version
3655 predates 1003.1-2001 and -c was not seen and the operand is
3656 "-o FILE" or "-oFILE". */
3657 int oi = -1;
3659 if (c == -1
3660 || (posixly_correct && nfiles != 0
3661 && ! (obsolete_usage
3662 && ! checkonly
3663 && optind != argc
3664 && argv[optind][0] == '-' && argv[optind][1] == 'o'
3665 && (argv[optind][2] || optind + 1 != argc)))
3666 || ((c = getopt_long (argc, argv, short_options,
3667 long_options, &oi))
3668 == -1))
3670 if (argc <= optind)
3671 break;
3672 files[nfiles++] = argv[optind++];
3674 else switch (c)
3676 case 1:
3677 key = NULL;
3678 if (optarg[0] == '+')
3680 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
3681 && ISDIGIT (argv[optind][1]));
3682 obsolete_usage |= minus_pos_usage && !posixly_correct;
3683 if (obsolete_usage)
3685 /* Treat +POS1 [-POS2] as a key if possible; but silently
3686 treat an operand as a file if it is not a valid +POS1. */
3687 key = key_init (&key_buf);
3688 s = parse_field_count (optarg + 1, &key->sword, NULL);
3689 if (s && *s == '.')
3690 s = parse_field_count (s + 1, &key->schar, NULL);
3691 if (! (key->sword || key->schar))
3692 key->sword = SIZE_MAX;
3693 if (! s || *set_ordering (s, key, bl_start))
3694 key = NULL;
3695 else
3697 if (minus_pos_usage)
3699 char const *optarg1 = argv[optind++];
3700 s = parse_field_count (optarg1 + 1, &key->eword,
3701 N_("invalid number after `-'"));
3702 if (*s == '.')
3703 s = parse_field_count (s + 1, &key->echar,
3704 N_("invalid number after `.'"));
3705 if (!key->echar && key->eword)
3707 /* obsolescent syntax +A.x -B.y is equivalent to:
3708 -k A+1.x+1,B.y (when y = 0)
3709 -k A+1.x+1,B+1.y (when y > 0)
3710 So eword is decremented as in the -k case
3711 only when the end field (B) is specified and
3712 echar (y) is 0. */
3713 key->eword--;
3715 if (*set_ordering (s, key, bl_end))
3716 badfieldspec (optarg1,
3717 N_("stray character in field spec"));
3719 key->obsolete_used = true;
3720 insertkey (key);
3724 if (! key)
3725 files[nfiles++] = optarg;
3726 break;
3728 case SORT_OPTION:
3729 c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
3730 /* Fall through. */
3731 case 'b':
3732 case 'd':
3733 case 'f':
3734 case 'g':
3735 case 'h':
3736 case 'i':
3737 case 'M':
3738 case 'n':
3739 case 'r':
3740 case 'R':
3741 case 'V':
3743 char str[2];
3744 str[0] = c;
3745 str[1] = '\0';
3746 set_ordering (str, &gkey, bl_both);
3748 break;
3750 case CHECK_OPTION:
3751 c = (optarg
3752 ? XARGMATCH ("--check", optarg, check_args, check_types)
3753 : 'c');
3754 /* Fall through. */
3755 case 'c':
3756 case 'C':
3757 if (checkonly && checkonly != c)
3758 incompatible_options ("cC");
3759 checkonly = c;
3760 break;
3762 case COMPRESS_PROGRAM_OPTION:
3763 if (compress_program && !STREQ (compress_program, optarg))
3764 error (SORT_FAILURE, 0, _("multiple compress programs specified"));
3765 compress_program = optarg;
3766 break;
3768 case DEBUG_PROGRAM_OPTION:
3769 debug = true;
3770 break;
3772 case FILES0_FROM_OPTION:
3773 files_from = optarg;
3774 break;
3776 case 'k':
3777 key = key_init (&key_buf);
3779 /* Get POS1. */
3780 s = parse_field_count (optarg, &key->sword,
3781 N_("invalid number at field start"));
3782 if (! key->sword--)
3784 /* Provoke with `sort -k0' */
3785 badfieldspec (optarg, N_("field number is zero"));
3787 if (*s == '.')
3789 s = parse_field_count (s + 1, &key->schar,
3790 N_("invalid number after `.'"));
3791 if (! key->schar--)
3793 /* Provoke with `sort -k1.0' */
3794 badfieldspec (optarg, N_("character offset is zero"));
3797 if (! (key->sword || key->schar))
3798 key->sword = SIZE_MAX;
3799 s = set_ordering (s, key, bl_start);
3800 if (*s != ',')
3802 key->eword = SIZE_MAX;
3803 key->echar = 0;
3805 else
3807 /* Get POS2. */
3808 s = parse_field_count (s + 1, &key->eword,
3809 N_("invalid number after `,'"));
3810 if (! key->eword--)
3812 /* Provoke with `sort -k1,0' */
3813 badfieldspec (optarg, N_("field number is zero"));
3815 if (*s == '.')
3817 s = parse_field_count (s + 1, &key->echar,
3818 N_("invalid number after `.'"));
3820 s = set_ordering (s, key, bl_end);
3822 if (*s)
3823 badfieldspec (optarg, N_("stray character in field spec"));
3824 insertkey (key);
3825 break;
3827 case 'm':
3828 mergeonly = true;
3829 break;
3831 case NMERGE_OPTION:
3832 specify_nmerge (oi, c, optarg);
3833 break;
3835 case 'o':
3836 if (outfile && !STREQ (outfile, optarg))
3837 error (SORT_FAILURE, 0, _("multiple output files specified"));
3838 outfile = optarg;
3839 break;
3841 case RANDOM_SOURCE_OPTION:
3842 if (random_source && !STREQ (random_source, optarg))
3843 error (SORT_FAILURE, 0, _("multiple random sources specified"));
3844 random_source = optarg;
3845 break;
3847 case 's':
3848 stable = true;
3849 break;
3851 case 'S':
3852 specify_sort_size (oi, c, optarg);
3853 break;
3855 case 't':
3857 char newtab = optarg[0];
3858 if (! newtab)
3859 error (SORT_FAILURE, 0, _("empty tab"));
3860 if (optarg[1])
3862 if (STREQ (optarg, "\\0"))
3863 newtab = '\0';
3864 else
3866 /* Provoke with `sort -txx'. Complain about
3867 "multi-character tab" instead of "multibyte tab", so
3868 that the diagnostic's wording does not need to be
3869 changed once multibyte characters are supported. */
3870 error (SORT_FAILURE, 0, _("multi-character tab %s"),
3871 quote (optarg));
3874 if (tab != TAB_DEFAULT && tab != newtab)
3875 error (SORT_FAILURE, 0, _("incompatible tabs"));
3876 tab = newtab;
3878 break;
3880 case 'T':
3881 add_temp_dir (optarg);
3882 break;
3884 case 'u':
3885 unique = true;
3886 break;
3888 case 'y':
3889 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3890 through Solaris 7. It is also accepted by many non-Solaris
3891 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3892 -y is marked as obsolete starting with Solaris 8 (1999), but is
3893 still accepted as of Solaris 10 prerelease (2004).
3895 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3896 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3897 and which in general ignores the argument after "-y" if it
3898 consists entirely of digits (it can even be empty). */
3899 if (optarg == argv[optind - 1])
3901 char const *p;
3902 for (p = optarg; ISDIGIT (*p); p++)
3903 continue;
3904 optind -= (*p != '\0');
3906 break;
3908 case 'z':
3909 eolchar = 0;
3910 break;
3912 case_GETOPT_HELP_CHAR;
3914 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3916 default:
3917 usage (SORT_FAILURE);
3921 if (files_from)
3923 FILE *stream;
3925 /* When using --files0-from=F, you may not specify any files
3926 on the command-line. */
3927 if (nfiles)
3929 error (0, 0, _("extra operand %s"), quote (files[0]));
3930 fprintf (stderr, "%s\n",
3931 _("file operands cannot be combined with --files0-from"));
3932 usage (SORT_FAILURE);
3935 if (STREQ (files_from, "-"))
3936 stream = stdin;
3937 else
3939 stream = fopen (files_from, "r");
3940 if (stream == NULL)
3941 error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3942 quote (files_from));
3945 readtokens0_init (&tok);
3947 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3948 error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3949 quote (files_from));
3951 if (tok.n_tok)
3953 size_t i;
3954 free (files);
3955 files = tok.tok;
3956 nfiles = tok.n_tok;
3957 for (i = 0; i < nfiles; i++)
3959 if (STREQ (files[i], "-"))
3960 error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3961 "no file name of %s allowed"),
3962 quote (files[i]));
3963 else if (files[i][0] == '\0')
3965 /* Using the standard `filename:line-number:' prefix here is
3966 not totally appropriate, since NUL is the separator, not NL,
3967 but it might be better than nothing. */
3968 unsigned long int file_number = i + 1;
3969 error (SORT_FAILURE, 0,
3970 _("%s:%lu: invalid zero-length file name"),
3971 quotearg_colon (files_from), file_number);
3975 else
3976 error (SORT_FAILURE, 0, _("no input from %s"),
3977 quote (files_from));
3980 /* Inheritance of global options to individual keys. */
3981 for (key = keylist; key; key = key->next)
3983 if (default_key_compare (key) && !key->reverse)
3985 key->ignore = gkey.ignore;
3986 key->translate = gkey.translate;
3987 key->skipsblanks = gkey.skipsblanks;
3988 key->skipeblanks = gkey.skipeblanks;
3989 key->month = gkey.month;
3990 key->numeric = gkey.numeric;
3991 key->general_numeric = gkey.general_numeric;
3992 key->human_numeric = gkey.human_numeric;
3993 key->version = gkey.version;
3994 key->random = gkey.random;
3995 key->reverse = gkey.reverse;
3998 need_random |= key->random;
4001 if (!keylist && !default_key_compare (&gkey))
4003 gkey_only = true;
4004 insertkey (&gkey);
4005 need_random |= gkey.random;
4008 check_ordering_compatibility ();
4010 /* Disable this combination so that users are less likely
4011 to inadvertantly update a file with debugging enabled.
4012 Also it simplifies the code for handling temp files. */
4013 if (debug && outfile)
4014 error (SORT_FAILURE, 0, _("options -o and --debug are incompatible"));
4016 if (debug)
4018 /* Always output the locale in debug mode, since this
4019 is such a common source of confusion. */
4020 if (hard_LC_COLLATE)
4021 error (0, 0, _("using %s sorting rules"),
4022 quote (setlocale (LC_COLLATE, NULL)));
4023 else
4024 error (0, 0, _("using simple byte comparison"));
4025 key_warnings (&gkey, gkey_only);
4028 reverse = gkey.reverse;
4030 if (need_random)
4032 randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
4033 if (! randread_source)
4034 die (_("open failed"), random_source);
4037 if (temp_dir_count == 0)
4039 char const *tmp_dir = getenv ("TMPDIR");
4040 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
4043 if (nfiles == 0)
4045 static char *minus = (char *) "-";
4046 nfiles = 1;
4047 free (files);
4048 files = &minus;
4051 /* Need to re-check that we meet the minimum requirement for memory
4052 usage with the final value for NMERGE. */
4053 if (0 < sort_size)
4054 sort_size = MAX (sort_size, MIN_SORT_SIZE);
4056 if (checkonly)
4058 if (nfiles > 1)
4059 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
4060 quote (files[1]), checkonly);
4062 if (outfile)
4064 static char opts[] = {0, 'o', 0};
4065 opts[0] = checkonly;
4066 incompatible_options (opts);
4069 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
4070 input is not properly sorted. */
4071 exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
4074 if (mergeonly)
4076 struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
4077 size_t i;
4079 for (i = 0; i < nfiles; ++i)
4080 sortfiles[i].name = files[i];
4082 merge (sortfiles, 0, nfiles, outfile);
4083 IF_LINT (free (sortfiles));
4085 else
4086 sort (files, nfiles, outfile);
4088 if (have_read_stdin && fclose (stdin) == EOF)
4089 die (_("close failed"), "-");
4091 exit (EXIT_SUCCESS);