1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written December 1988 by Mike Haertel.
18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19 or (US mail) as Mike Haertel c/o Free Software Foundation.
21 Ørn E. Hansen added NLS support in 1997. */
26 #include <sys/types.h>
32 #include "filevercmp.h"
33 #include "hard-locale.h"
35 #include "ignore-value.h"
43 #include "readtokens0.h"
46 #include "strnumcmp.h"
49 #include "xnanosleep.h"
52 #if HAVE_SYS_RESOURCE_H
53 # include <sys/resource.h>
56 struct rlimit
{ size_t rlim_cur
; };
57 # define getrlimit(Resource, Rlp) (-1)
60 /* The official name of this program (e.g., no `g' prefix). */
61 #define PROGRAM_NAME "sort"
64 proper_name ("Mike Haertel"), \
65 proper_name ("Paul Eggert")
67 #if HAVE_LANGINFO_CODESET
68 # include <langinfo.h>
71 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
74 # define SA_NOCLDSTOP 0
75 /* No sigprocmask. Always 'return' zero. */
76 # define sigprocmask(How, Set, Oset) (0)
78 # if ! HAVE_SIGINTERRUPT
79 # define siginterrupt(sig, flag) /* empty */
83 #if !defined OPEN_MAX && defined NR_OPEN
84 # define OPEN_MAX NR_OPEN
90 #define UCHAR_LIM (UCHAR_MAX + 1)
92 #ifndef DEFAULT_TMPDIR
93 # define DEFAULT_TMPDIR "/tmp"
99 /* POSIX says to exit with status 1 if invoked with -c and the
100 input is not properly sorted. */
101 SORT_OUT_OF_ORDER
= 1,
103 /* POSIX says any other irregular exit must exit with a status
104 code greater than 1. */
110 /* The number of times we should try to fork a compression process
111 (we retry if the fork call fails). We don't _need_ to compress
112 temp files, this is just to reduce disk access, so this number
113 can be small. Each retry doubles in duration. */
114 MAX_FORK_TRIES_COMPRESS
= 4,
116 /* The number of times we should try to fork a decompression process.
117 If we can't fork a decompression process, we can't sort, so this
118 number should be big. Each retry doubles in duration. */
119 MAX_FORK_TRIES_DECOMPRESS
= 9
122 /* The representation of the decimal point in the current locale. */
123 static int decimal_point
;
125 /* Thousands separator; if -1, then there isn't one. */
126 static int thousands_sep
;
128 /* Nonzero if the corresponding locales are hard. */
129 static bool hard_LC_COLLATE
;
131 static bool hard_LC_TIME
;
134 #define NONZERO(x) ((x) != 0)
136 /* The kind of blanks for '-b' to skip in various options. */
137 enum blanktype
{ bl_start
, bl_end
, bl_both
};
139 /* The character marking end of line. Default to \n. */
140 static char eolchar
= '\n';
142 /* Lines are held in core as counted strings. */
145 char *text
; /* Text of the line. */
146 size_t length
; /* Length including final newline. */
147 char *keybeg
; /* Start of first key. */
148 char *keylim
; /* Limit of first key. */
154 char *buf
; /* Dynamically allocated buffer,
155 partitioned into 3 regions:
158 - an array of lines, in reverse order. */
159 size_t used
; /* Number of bytes used for input data. */
160 size_t nlines
; /* Number of lines in the line array. */
161 size_t alloc
; /* Number of bytes allocated. */
162 size_t left
; /* Number of bytes left from previous reads. */
163 size_t line_bytes
; /* Number of bytes to reserve for each line. */
164 bool eof
; /* An EOF has been read. */
169 size_t sword
; /* Zero-origin 'word' to start at. */
170 size_t schar
; /* Additional characters to skip. */
171 size_t eword
; /* Zero-origin last 'word' of key. */
172 size_t echar
; /* Additional characters in field. */
173 bool const *ignore
; /* Boolean array of characters to ignore. */
174 char const *translate
; /* Translation applied to characters. */
175 bool skipsblanks
; /* Skip leading blanks when finding start. */
176 bool skipeblanks
; /* Skip leading blanks when finding end. */
177 bool numeric
; /* Flag for numeric comparison. Handle
178 strings of digits with optional decimal
179 point, but no exponential notation. */
180 bool random
; /* Sort by random hash of key. */
181 bool general_numeric
; /* Flag for general, numeric comparison.
182 Handle numbers in exponential notation. */
183 bool human_numeric
; /* Flag for sorting by human readable
184 units with either SI xor IEC prefixes. */
185 int iec_present
; /* Flag for checking for mixed SI and IEC. */
186 bool month
; /* Flag for comparison by month name. */
187 bool reverse
; /* Reverse the sense of comparison. */
188 bool version
; /* sort by version number */
189 bool obsolete_used
; /* obsolescent key option format is used. */
190 struct keyfield
*next
; /* Next keyfield to try. */
199 /* FIXME: None of these tables work with multibyte character sets.
200 Also, there are many other bugs when handling multibyte characters.
201 One way to fix this is to rewrite `sort' to use wide characters
202 internally, but doing this with good performance is a bit
205 /* Table of blanks. */
206 static bool blanks
[UCHAR_LIM
];
208 /* Table of non-printing characters. */
209 static bool nonprinting
[UCHAR_LIM
];
211 /* Table of non-dictionary characters (not letters, digits, or blanks). */
212 static bool nondictionary
[UCHAR_LIM
];
214 /* Translation table folding lower case to upper. */
215 static unsigned char fold_toupper
[UCHAR_LIM
];
217 #define MONTHS_PER_YEAR 12
219 /* Table mapping month names to integers.
220 Alphabetic order allows binary search. */
221 static struct month monthtab
[] =
237 /* During the merge phase, the number of files to merge at once. */
238 #define NMERGE_DEFAULT 16
240 /* Minimum size for a merge or check buffer. */
241 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
243 /* Minimum sort size; the code might not work with smaller sizes. */
244 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
246 /* The number of bytes needed for a merge or check buffer, which can
247 function relatively efficiently even if it holds only one line. If
248 a longer line is seen, this value is increased. */
249 static size_t merge_buffer_size
= MAX (MIN_MERGE_BUFFER_SIZE
, 256 * 1024);
251 /* The approximate maximum number of bytes of main memory to use, as
252 specified by the user. Zero if the user has not specified a size. */
253 static size_t sort_size
;
255 /* The guessed size for non-regular files. */
256 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
258 /* Array of directory names in which any temporary files are to be created. */
259 static char const **temp_dirs
;
261 /* Number of temporary directory names used. */
262 static size_t temp_dir_count
;
264 /* Number of allocated slots in temp_dirs. */
265 static size_t temp_dir_alloc
;
267 /* Flag to reverse the order of all comparisons. */
270 /* Flag for stable sort. This turns off the last ditch bytewise
271 comparison of lines, and instead leaves lines in the same order
272 they were read if all keys compare equal. */
275 /* If TAB has this value, blanks separate fields. */
276 enum { TAB_DEFAULT
= CHAR_MAX
+ 1 };
278 /* Tab character separating fields. If TAB_DEFAULT, then fields are
279 separated by the empty string between a non-blank character and a blank
281 static int tab
= TAB_DEFAULT
;
283 /* Flag to remove consecutive duplicate lines from the output.
284 Only the last of a sequence of equal lines will be output. */
287 /* Nonzero if any of the input files are the standard input. */
288 static bool have_read_stdin
;
290 /* List of key field comparisons to be tried. */
291 static struct keyfield
*keylist
;
293 /* Program used to (de)compress temp files. Must accept -d. */
294 static char const *compress_program
;
296 /* Annotate the output with extra info to aid the user. */
299 /* Maximum number of files to merge in one go. If more than this
300 number are present, temp files will be used. */
301 static unsigned int nmerge
= NMERGE_DEFAULT
;
303 static void sortlines_temp (struct line
*, size_t, struct line
*);
305 /* Report MESSAGE for FILE, then clean up and exit.
306 If FILE is null, it represents standard output. */
308 static void die (char const *, char const *) ATTRIBUTE_NORETURN
;
310 die (char const *message
, char const *file
)
312 error (0, errno
, "%s: %s", message
, file
? file
: _("standard output"));
319 if (status
!= EXIT_SUCCESS
)
320 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
325 Usage: %s [OPTION]... [FILE]...\n\
326 or: %s [OPTION]... --files0-from=F\n\
328 program_name
, program_name
);
330 Write sorted concatenation of all FILE(s) to standard output.\n\
334 Mandatory arguments to long options are mandatory for short options too.\n\
341 -b, --ignore-leading-blanks ignore leading blanks\n\
342 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
343 -f, --ignore-case fold lower case to upper case characters\n\
346 -g, --general-numeric-sort compare according to general numerical value\n\
347 -i, --ignore-nonprinting consider only printable characters\n\
348 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
351 -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n\
354 -n, --numeric-sort compare according to string numerical value\n\
355 -R, --random-sort sort by random hash of keys\n\
356 --random-source=FILE get random bytes from FILE\n\
357 -r, --reverse reverse the result of comparisons\n\
360 --sort=WORD sort according to WORD:\n\
361 general-numeric -g, human-numeric -h, month -M,\n\
362 numeric -n, random -R, version -V\n\
363 -V, --version-sort natural sort of (version) numbers within text\n\
371 --batch-size=NMERGE merge at most NMERGE inputs at once;\n\
372 for more use temp files\n\
375 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
376 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
377 --compress-program=PROG compress temporaries with PROG;\n\
378 decompress them with PROG -d\n\
381 --debug annotate the part of the line used to sort,\n\
382 and warn about questionable usage to stderr\n\
383 --files0-from=F read input from the files specified by\n\
384 NUL-terminated names in file F;\n\
385 If F is - then read names from standard input\n\
388 -k, --key=POS1[,POS2] start a key at POS1 (origin 1), end it at POS2\n\
389 (default end of line)\n\
390 -m, --merge merge already sorted files; do not sort\n\
393 -o, --output=FILE write result to FILE instead of standard output\n\
394 -s, --stable stabilize sort by disabling last-resort comparison\n\
395 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
398 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
399 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
400 multiple options specify multiple directories\n\
401 -u, --unique with -c, check for strict ordering;\n\
402 without -c, output only the first of an equal run\n\
405 -z, --zero-terminated end lines with 0 byte, not newline\n\
407 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
408 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
411 POS is F[.C][OPTS], where F is the field number and C the character position\n\
412 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
413 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
414 one or more single-letter ordering options, which override global ordering\n\
415 options for that key. If no key is given, use the entire line as the key.\n\
417 SIZE may be followed by the following multiplicative suffixes:\n\
420 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
422 With no FILE, or when FILE is -, read standard input.\n\
425 The locale specified by the environment affects sort order.\n\
426 Set LC_ALL=C to get the traditional sort order that uses\n\
427 native byte values.\n\
429 emit_ancillary_info ();
435 /* For long options that have no equivalent short option, use a
436 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
439 CHECK_OPTION
= CHAR_MAX
+ 1,
440 COMPRESS_PROGRAM_OPTION
,
441 DEBUG_PROGRAM_OPTION
,
444 RANDOM_SOURCE_OPTION
,
448 static char const short_options
[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z";
450 static struct option
const long_options
[] =
452 {"ignore-leading-blanks", no_argument
, NULL
, 'b'},
453 {"check", optional_argument
, NULL
, CHECK_OPTION
},
454 {"compress-program", required_argument
, NULL
, COMPRESS_PROGRAM_OPTION
},
455 {"debug", no_argument
, NULL
, DEBUG_PROGRAM_OPTION
},
456 {"dictionary-order", no_argument
, NULL
, 'd'},
457 {"ignore-case", no_argument
, NULL
, 'f'},
458 {"files0-from", required_argument
, NULL
, FILES0_FROM_OPTION
},
459 {"general-numeric-sort", no_argument
, NULL
, 'g'},
460 {"ignore-nonprinting", no_argument
, NULL
, 'i'},
461 {"key", required_argument
, NULL
, 'k'},
462 {"merge", no_argument
, NULL
, 'm'},
463 {"month-sort", no_argument
, NULL
, 'M'},
464 {"numeric-sort", no_argument
, NULL
, 'n'},
465 {"human-numeric-sort", no_argument
, NULL
, 'h'},
466 {"version-sort", no_argument
, NULL
, 'V'},
467 {"random-sort", no_argument
, NULL
, 'R'},
468 {"random-source", required_argument
, NULL
, RANDOM_SOURCE_OPTION
},
469 {"sort", required_argument
, NULL
, SORT_OPTION
},
470 {"output", required_argument
, NULL
, 'o'},
471 {"reverse", no_argument
, NULL
, 'r'},
472 {"stable", no_argument
, NULL
, 's'},
473 {"batch-size", required_argument
, NULL
, NMERGE_OPTION
},
474 {"buffer-size", required_argument
, NULL
, 'S'},
475 {"field-separator", required_argument
, NULL
, 't'},
476 {"temporary-directory", required_argument
, NULL
, 'T'},
477 {"unique", no_argument
, NULL
, 'u'},
478 {"zero-terminated", no_argument
, NULL
, 'z'},
479 {GETOPT_HELP_OPTION_DECL
},
480 {GETOPT_VERSION_OPTION_DECL
},
484 #define CHECK_TABLE \
486 _ct_("silent", 'C') \
487 _ct_("diagnose-first", 'c')
489 static char const *const check_args
[] =
491 #define _ct_(_s, _c) _s,
495 static char const check_types
[] =
497 #define _ct_(_s, _c) _c,
503 _st_("general-numeric", 'g') \
504 _st_("human-numeric", 'h') \
506 _st_("numeric", 'n') \
507 _st_("random", 'R') \
510 static char const *const sort_args
[] =
512 #define _st_(_s, _c) _s,
516 static char const sort_types
[] =
518 #define _st_(_s, _c) _c,
523 /* The set of signals that are caught. */
524 static sigset_t caught_signals
;
526 /* Critical section status. */
533 /* Enter a critical section. */
534 static struct cs_status
537 struct cs_status status
;
538 status
.valid
= (sigprocmask (SIG_BLOCK
, &caught_signals
, &status
.sigs
) == 0);
542 /* Leave a critical section. */
544 cs_leave (struct cs_status status
)
548 /* Ignore failure when restoring the signal mask. */
549 sigprocmask (SIG_SETMASK
, &status
.sigs
, NULL
);
553 /* The list of temporary files. */
556 struct tempnode
*volatile next
;
557 pid_t pid
; /* If compressed, the pid of compressor, else zero */
558 char name
[1]; /* Actual size is 1 + file name length. */
560 static struct tempnode
*volatile temphead
;
561 static struct tempnode
*volatile *temptail
= &temphead
;
566 pid_t pid
; /* If compressed, the pid of compressor, else zero */
569 /* A table where we store compression process states. We clean up all
570 processes in a timely manner so as not to exhaust system resources,
571 so we store the info on whether the process is still running, or has
573 static Hash_table
*proctab
;
575 enum { INIT_PROCTAB_SIZE
= 47 };
577 enum procstate
{ ALIVE
, ZOMBIE
};
579 /* A proctab entry. The COUNT field is there in case we fork a new
580 compression process that has the same PID as an old zombie process
581 that is still in the table (because the process to decompress the
582 temp file it was associated with hasn't started yet). */
586 enum procstate state
;
591 proctab_hasher (const void *entry
, size_t tabsize
)
593 const struct procnode
*node
= entry
;
594 return node
->pid
% tabsize
;
598 proctab_comparator (const void *e1
, const void *e2
)
600 const struct procnode
*n1
= e1
, *n2
= e2
;
601 return n1
->pid
== n2
->pid
;
604 /* The total number of forked processes (compressors and decompressors)
605 that have not been reaped yet. */
606 static size_t nprocs
;
608 /* The number of child processes we'll allow before we try to reap some. */
609 enum { MAX_PROCS_BEFORE_REAP
= 2 };
611 /* If 0 < PID, wait for the child process with that PID to exit.
612 If PID is -1, clean up a random child process which has finished and
613 return the process ID of that child. If PID is -1 and no processes
614 have quit yet, return 0 without waiting. */
620 pid_t cpid
= waitpid (pid
, &status
, pid
< 0 ? WNOHANG
: 0);
623 error (SORT_FAILURE
, errno
, _("waiting for %s [-d]"),
627 if (! WIFEXITED (status
) || WEXITSTATUS (status
))
628 error (SORT_FAILURE
, 0, _("%s [-d] terminated abnormally"),
636 /* Add the PID of a running compression process to proctab, or update
637 the entry COUNT and STATE fields if it's already there. This also
638 creates the table for us the first time it's called. */
641 register_proc (pid_t pid
)
643 struct procnode test
, *node
;
647 proctab
= hash_initialize (INIT_PROCTAB_SIZE
, NULL
,
656 node
= hash_lookup (proctab
, &test
);
664 node
= xmalloc (sizeof *node
);
668 if (hash_insert (proctab
, node
) == NULL
)
673 /* This is called when we reap a random process. We don't know
674 whether we have reaped a compression process or a decompression
675 process until we look in the table. If there's an ALIVE entry for
676 it, then we have reaped a compression process, so change the state
677 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
680 update_proc (pid_t pid
)
682 struct procnode test
, *node
;
685 node
= hash_lookup (proctab
, &test
);
687 node
->state
= ZOMBIE
;
690 /* This is for when we need to wait for a compression process to exit.
691 If it has a ZOMBIE entry in the table then it's already dead and has
692 been reaped. Note that if there's an ALIVE entry for it, it still may
693 already have died and been reaped if a second process was created with
694 the same PID. This is probably exceedingly rare, but to be on the safe
695 side we will have to wait for any compression process with this PID. */
698 wait_proc (pid_t pid
)
700 struct procnode test
, *node
;
703 node
= hash_lookup (proctab
, &test
);
704 if (node
->state
== ALIVE
)
707 node
->state
= ZOMBIE
;
710 hash_delete (proctab
, node
);
715 /* Keep reaping finished children as long as there are more to reap.
716 This doesn't block waiting for any of them, it only reaps those
717 that are already dead. */
724 while (0 < nprocs
&& (pid
= reap (-1)))
728 /* Clean up any remaining temporary files. */
733 struct tempnode
const *node
;
735 for (node
= temphead
; node
; node
= node
->next
)
740 /* Cleanup actions to take when exiting. */
747 /* Clean up any remaining temporary files in a critical section so
748 that a signal handler does not try to clean them too. */
749 struct cs_status cs
= cs_enter ();
757 /* Create a new temporary file, returning its newly allocated tempnode.
758 Store into *PFD the file descriptor open for writing.
759 If the creation fails, return NULL and store -1 into *PFD if the
760 failure is due to file descriptor exhaustion and
761 SURVIVE_FD_EXHAUSTION; otherwise, die. */
763 static struct tempnode
*
764 create_temp_file (int *pfd
, bool survive_fd_exhaustion
)
766 static char const slashbase
[] = "/sortXXXXXX";
767 static size_t temp_dir_index
;
770 char const *temp_dir
= temp_dirs
[temp_dir_index
];
771 size_t len
= strlen (temp_dir
);
772 struct tempnode
*node
=
773 xmalloc (offsetof (struct tempnode
, name
) + len
+ sizeof slashbase
);
774 char *file
= node
->name
;
777 memcpy (file
, temp_dir
, len
);
778 memcpy (file
+ len
, slashbase
, sizeof slashbase
);
781 if (++temp_dir_index
== temp_dir_count
)
784 /* Create the temporary file in a critical section, to avoid races. */
790 temptail
= &node
->next
;
798 if (! (survive_fd_exhaustion
&& errno
== EMFILE
))
799 error (SORT_FAILURE
, errno
, _("cannot create temporary file in %s"),
809 /* Predeclare an access pattern for input files.
810 Ignore any errors -- this is only advisory.
812 There are a few hints we could possibly provide,
813 and after careful testing it was decided that
814 specifying POSIX_FADV_SEQUENTIAL was not detrimental
815 to any cases. On Linux 2.6.31, this option doubles
816 the size of read ahead performed and thus was seen to
819 Sorting with a smaller internal buffer
820 Reading from faster flash devices
822 In _addition_ one could also specify other hints...
824 POSIX_FADV_WILLNEED was tested, but Linux 2.6.31
825 at least uses that to _synchronously_ prepopulate the cache
826 with the specified range. While sort does need to
827 read all of its input before outputting, a synchronous
828 read of the whole file up front precludes any processing
829 that sort could do in parallel with the system doing
830 read ahead of the data. This was seen to have negative effects
831 in a couple of cases:
833 Sorting with a smaller internal buffer
834 Note this option was seen to shorten the runtime for sort
835 on a multicore system with lots of RAM and other processes
836 competing for CPU. It could be argued that more explicit
837 scheduling hints with `nice` et. al. are more appropriate
840 POSIX_FADV_NOREUSE is a possibility as it could lower
841 the priority of input data in the cache as sort will
842 only need to process it once. However its functionality
843 has changed over Linux kernel versions and as of 2.6.31
844 it does nothing and thus we can't depend on what it might
847 POSIX_FADV_DONTNEED is not appropriate for user specified
848 input files, but for temp files we do want to drop the
849 cache immediately after processing. This is done implicitly
850 however when the files are unlinked. */
853 fadvise_input (FILE *fp
)
855 #if HAVE_POSIX_FADVISE
858 int fd
= fileno (fp
);
859 ignore_value (posix_fadvise (fd
, 0, 0, POSIX_FADV_SEQUENTIAL
));
864 /* Return a stream for FILE, opened with mode HOW. A null FILE means
865 standard output; HOW should be "w". When opening for input, "-"
866 means standard input. To avoid confusion, do not return file
867 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
868 opening an ordinary FILE. Return NULL if unsuccessful. */
871 stream_open (const char *file
, const char *how
)
878 if (STREQ (file
, "-"))
880 have_read_stdin
= true;
884 fp
= fopen (file
, how
);
888 return fopen (file
, how
);
891 /* Same as stream_open, except always return a non-null value; die on
895 xfopen (const char *file
, const char *how
)
897 FILE *fp
= stream_open (file
, how
);
899 die (_("open failed"), file
);
903 /* Close FP, whose name is FILE, and report any errors. */
906 xfclose (FILE *fp
, char const *file
)
911 /* Allow reading stdin from tty more than once. */
917 /* Don't close stdout just yet. close_stdout does that. */
918 if (fflush (fp
) != 0)
919 die (_("fflush failed"), file
);
923 if (fclose (fp
) != 0)
924 die (_("close failed"), file
);
930 dup2_or_die (int oldfd
, int newfd
)
932 if (dup2 (oldfd
, newfd
) < 0)
933 error (SORT_FAILURE
, errno
, _("dup2 failed"));
936 /* Fork a child process for piping to and do common cleanup. The
937 TRIES parameter tells us how many times to try to fork before
938 giving up. Return the PID of the child, or -1 (setting errno)
942 pipe_fork (int pipefds
[2], size_t tries
)
944 #if HAVE_WORKING_FORK
945 struct tempnode
*saved_temphead
;
947 double wait_retry
= 0.25;
948 pid_t pid
IF_LINT (= -1);
951 if (pipe (pipefds
) < 0)
956 /* This is so the child process won't delete our temp files
957 if it receives a signal before exec-ing. */
959 saved_temphead
= temphead
;
965 temphead
= saved_temphead
;
970 if (0 <= pid
|| errno
!= EAGAIN
)
974 xnanosleep (wait_retry
);
989 close (STDIN_FILENO
);
990 close (STDOUT_FILENO
);
997 #else /* ! HAVE_WORKING_FORK */
1002 /* Create a temporary file and start a compression program to filter output
1003 to that file. Set *PFP to the file handle and if PPID is non-NULL,
1004 set *PPID to the PID of the newly-created process. If the creation
1005 fails, return NULL if the failure is due to file descriptor
1006 exhaustion and SURVIVE_FD_EXHAUSTION; otherwise, die. */
1009 maybe_create_temp (FILE **pfp
, pid_t
*ppid
, bool survive_fd_exhaustion
)
1012 struct tempnode
*node
= create_temp_file (&tempfd
, survive_fd_exhaustion
);
1020 if (compress_program
)
1024 node
->pid
= pipe_fork (pipefds
, MAX_FORK_TRIES_COMPRESS
);
1029 tempfd
= pipefds
[1];
1031 register_proc (node
->pid
);
1033 else if (node
->pid
== 0)
1036 dup2_or_die (tempfd
, STDOUT_FILENO
);
1038 dup2_or_die (pipefds
[0], STDIN_FILENO
);
1041 if (execlp (compress_program
, compress_program
, (char *) NULL
) < 0)
1042 error (SORT_FAILURE
, errno
, _("couldn't execute %s"),
1049 *pfp
= fdopen (tempfd
, "w");
1051 die (_("couldn't create temporary file"), name
);
1059 /* Create a temporary file and start a compression program to filter output
1060 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
1061 set it to the PID of the newly-created process. Die on failure. */
1064 create_temp (FILE **pfp
, pid_t
*ppid
)
1066 return maybe_create_temp (pfp
, ppid
, false);
1069 /* Open a compressed temp file and start a decompression process through
1070 which to filter the input. PID must be the valid processes ID of the
1071 process used to compress the file. Return NULL (setting errno to
1072 EMFILE) if we ran out of file descriptors, and die on any other
1076 open_temp (const char *name
, pid_t pid
)
1078 int tempfd
, pipefds
[2];
1083 tempfd
= open (name
, O_RDONLY
);
1087 switch (pipe_fork (pipefds
, MAX_FORK_TRIES_DECOMPRESS
))
1090 if (errno
!= EMFILE
)
1091 error (SORT_FAILURE
, errno
, _("couldn't create process for %s -d"),
1099 dup2_or_die (tempfd
, STDIN_FILENO
);
1101 dup2_or_die (pipefds
[1], STDOUT_FILENO
);
1104 execlp (compress_program
, compress_program
, "-d", (char *) NULL
);
1105 error (SORT_FAILURE
, errno
, _("couldn't execute %s -d"),
1112 fp
= fdopen (pipefds
[0], "r");
1115 int saved_errno
= errno
;
1117 errno
= saved_errno
;
1125 /* Append DIR to the array of temporary directory names. */
1127 add_temp_dir (char const *dir
)
1129 if (temp_dir_count
== temp_dir_alloc
)
1130 temp_dirs
= X2NREALLOC (temp_dirs
, &temp_dir_alloc
);
1132 temp_dirs
[temp_dir_count
++] = dir
;
1135 /* Remove NAME from the list of temporary files. */
1138 zaptemp (const char *name
)
1140 struct tempnode
*volatile *pnode
;
1141 struct tempnode
*node
;
1142 struct tempnode
*next
;
1144 int unlink_errno
= 0;
1145 struct cs_status cs
;
1147 for (pnode
= &temphead
; (node
= *pnode
)->name
!= name
; pnode
= &node
->next
)
1150 /* Unlink the temporary file in a critical section to avoid races. */
1153 unlink_status
= unlink (name
);
1154 unlink_errno
= errno
;
1158 if (unlink_status
!= 0)
1159 error (0, unlink_errno
, _("warning: cannot remove: %s"), name
);
1165 #if HAVE_NL_LANGINFO
1168 struct_month_cmp (const void *m1
, const void *m2
)
1170 struct month
const *month1
= m1
;
1171 struct month
const *month2
= m2
;
1172 return strcmp (month1
->name
, month2
->name
);
1177 /* Initialize the character class tables. */
1184 for (i
= 0; i
< UCHAR_LIM
; ++i
)
1186 blanks
[i
] = !! isblank (i
);
1187 nonprinting
[i
] = ! isprint (i
);
1188 nondictionary
[i
] = ! isalnum (i
) && ! isblank (i
);
1189 fold_toupper
[i
] = toupper (i
);
1192 #if HAVE_NL_LANGINFO
1193 /* If we're not in the "C" locale, read different names for months. */
1196 for (i
= 0; i
< MONTHS_PER_YEAR
; i
++)
1203 s
= (char *) nl_langinfo (ABMON_1
+ i
);
1205 monthtab
[i
].name
= name
= xmalloc (s_len
+ 1);
1206 monthtab
[i
].val
= i
+ 1;
1208 for (j
= k
= 0; j
< s_len
; j
++)
1209 if (! isblank (to_uchar (s
[j
])))
1210 name
[k
++] = fold_toupper
[to_uchar (s
[j
])];
1213 qsort ((void *) monthtab
, MONTHS_PER_YEAR
,
1214 sizeof *monthtab
, struct_month_cmp
);
1219 /* Specify how many inputs may be merged at once.
1220 This may be set on the command-line with the
1221 --batch-size option. */
1223 specify_nmerge (int oi
, char c
, char const *s
)
1226 struct rlimit rlimit
;
1227 enum strtol_error e
= xstrtoumax (s
, NULL
, 10, &n
, NULL
);
1229 /* Try to find out how many file descriptors we'll be able
1230 to open. We need at least nmerge + 3 (STDIN_FILENO,
1231 STDOUT_FILENO and STDERR_FILENO). */
1232 unsigned int max_nmerge
= ((getrlimit (RLIMIT_NOFILE
, &rlimit
) == 0
1237 if (e
== LONGINT_OK
)
1241 e
= LONGINT_OVERFLOW
;
1246 error (0, 0, _("invalid --%s argument %s"),
1247 long_options
[oi
].name
, quote (s
));
1248 error (SORT_FAILURE
, 0,
1249 _("minimum --%s argument is %s"),
1250 long_options
[oi
].name
, quote ("2"));
1252 else if (max_nmerge
< nmerge
)
1254 e
= LONGINT_OVERFLOW
;
1261 if (e
== LONGINT_OVERFLOW
)
1263 char max_nmerge_buf
[INT_BUFSIZE_BOUND (unsigned int)];
1264 error (0, 0, _("--%s argument %s too large"),
1265 long_options
[oi
].name
, quote (s
));
1266 error (SORT_FAILURE
, 0,
1267 _("maximum --%s argument with current rlimit is %s"),
1268 long_options
[oi
].name
,
1269 uinttostr (max_nmerge
, max_nmerge_buf
));
1272 xstrtol_fatal (e
, oi
, c
, long_options
, s
);
1275 /* Specify the amount of main memory to use when sorting. */
1277 specify_sort_size (int oi
, char c
, char const *s
)
1281 enum strtol_error e
= xstrtoumax (s
, &suffix
, 10, &n
, "EgGkKmMPtTYZ");
1283 /* The default unit is KiB. */
1284 if (e
== LONGINT_OK
&& ISDIGIT (suffix
[-1]))
1286 if (n
<= UINTMAX_MAX
/ 1024)
1289 e
= LONGINT_OVERFLOW
;
1292 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1293 if (e
== LONGINT_INVALID_SUFFIX_CHAR
&& ISDIGIT (suffix
[-1]) && ! suffix
[1])
1302 double mem
= physmem_total () * n
/ 100;
1304 /* Use "<", not "<=", to avoid problems with rounding. */
1305 if (mem
< UINTMAX_MAX
)
1311 e
= LONGINT_OVERFLOW
;
1316 if (e
== LONGINT_OK
)
1318 /* If multiple sort sizes are specified, take the maximum, so
1319 that option order does not matter. */
1326 sort_size
= MAX (sort_size
, MIN_SORT_SIZE
);
1330 e
= LONGINT_OVERFLOW
;
1333 xstrtol_fatal (e
, oi
, c
, long_options
, s
);
1336 /* Return the default sort size. */
1338 default_sort_size (void)
1340 /* Let MEM be available memory or 1/8 of total memory, whichever
1342 double avail
= physmem_available ();
1343 double total
= physmem_total ();
1344 double mem
= MAX (avail
, total
/ 8);
1345 struct rlimit rlimit
;
1347 /* Let SIZE be MEM, but no more than the maximum object size or
1348 system resource limits. Avoid the MIN macro here, as it is not
1349 quite right when only one argument is floating point. Don't
1350 bother to check for values like RLIM_INFINITY since in practice
1351 they are not much less than SIZE_MAX. */
1352 size_t size
= SIZE_MAX
;
1355 if (getrlimit (RLIMIT_DATA
, &rlimit
) == 0 && rlimit
.rlim_cur
< size
)
1356 size
= rlimit
.rlim_cur
;
1358 if (getrlimit (RLIMIT_AS
, &rlimit
) == 0 && rlimit
.rlim_cur
< size
)
1359 size
= rlimit
.rlim_cur
;
1362 /* Leave a large safety margin for the above limits, as failure can
1363 occur when they are exceeded. */
1367 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1368 Exceeding RSS is not fatal, but can be quite slow. */
1369 if (getrlimit (RLIMIT_RSS
, &rlimit
) == 0 && rlimit
.rlim_cur
/ 16 * 15 < size
)
1370 size
= rlimit
.rlim_cur
/ 16 * 15;
1373 /* Use no less than the minimum. */
1374 return MAX (size
, MIN_SORT_SIZE
);
1377 /* Return the sort buffer size to use with the input files identified
1378 by FPS and FILES, which are alternate names of the same files.
1379 NFILES gives the number of input files; NFPS may be less. Assume
1380 that each input line requires LINE_BYTES extra bytes' worth of line
1381 information. Do not exceed the size bound specified by the user
1382 (or a default size bound, if the user does not specify one). */
1385 sort_buffer_size (FILE *const *fps
, size_t nfps
,
1386 char *const *files
, size_t nfiles
,
1389 /* A bound on the input size. If zero, the bound hasn't been
1391 static size_t size_bound
;
1393 /* In the worst case, each input byte is a newline. */
1394 size_t worst_case_per_input_byte
= line_bytes
+ 1;
1396 /* Keep enough room for one extra input line and an extra byte.
1397 This extra room might be needed when preparing to read EOF. */
1398 size_t size
= worst_case_per_input_byte
+ 1;
1402 for (i
= 0; i
< nfiles
; i
++)
1408 if ((i
< nfps
? fstat (fileno (fps
[i
]), &st
)
1409 : STREQ (files
[i
], "-") ? fstat (STDIN_FILENO
, &st
)
1410 : stat (files
[i
], &st
))
1412 die (_("stat failed"), files
[i
]);
1414 if (S_ISREG (st
.st_mode
))
1415 file_size
= st
.st_size
;
1418 /* The file has unknown size. If the user specified a sort
1419 buffer size, use that; otherwise, guess the size. */
1422 file_size
= INPUT_FILE_SIZE_GUESS
;
1427 size_bound
= sort_size
;
1429 size_bound
= default_sort_size ();
1432 /* Add the amount of memory needed to represent the worst case
1433 where the input consists entirely of newlines followed by a
1434 single non-newline. Check for overflow. */
1435 worst_case
= file_size
* worst_case_per_input_byte
+ 1;
1436 if (file_size
!= worst_case
/ worst_case_per_input_byte
1437 || size_bound
- size
<= worst_case
)
1445 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1446 must be at least sizeof (struct line). Allocate ALLOC bytes
1450 initbuf (struct buffer
*buf
, size_t line_bytes
, size_t alloc
)
1452 /* Ensure that the line array is properly aligned. If the desired
1453 size cannot be allocated, repeatedly halve it until allocation
1454 succeeds. The smaller allocation may hurt overall performance,
1455 but that's better than failing. */
1458 alloc
+= sizeof (struct line
) - alloc
% sizeof (struct line
);
1459 buf
->buf
= malloc (alloc
);
1463 if (alloc
<= line_bytes
+ 1)
1467 buf
->line_bytes
= line_bytes
;
1469 buf
->used
= buf
->left
= buf
->nlines
= 0;
1473 /* Return one past the limit of the line array. */
1475 static inline struct line
*
1476 buffer_linelim (struct buffer
const *buf
)
1478 return (struct line
*) (buf
->buf
+ buf
->alloc
);
1481 /* Return a pointer to the first character of the field specified
1485 begfield (const struct line
*line
, const struct keyfield
*key
)
1487 char *ptr
= line
->text
, *lim
= ptr
+ line
->length
- 1;
1488 size_t sword
= key
->sword
;
1489 size_t schar
= key
->schar
;
1491 /* The leading field separator itself is included in a field when -t
1494 if (tab
!= TAB_DEFAULT
)
1495 while (ptr
< lim
&& sword
--)
1497 while (ptr
< lim
&& *ptr
!= tab
)
1503 while (ptr
< lim
&& sword
--)
1505 while (ptr
< lim
&& blanks
[to_uchar (*ptr
)])
1507 while (ptr
< lim
&& !blanks
[to_uchar (*ptr
)])
1511 /* If we're ignoring leading blanks when computing the Start
1512 of the field, skip past them here. */
1513 if (key
->skipsblanks
)
1514 while (ptr
< lim
&& blanks
[to_uchar (*ptr
)])
1517 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1518 ptr
= MIN (lim
, ptr
+ schar
);
1523 /* Return the limit of (a pointer to the first character after) the field
1524 in LINE specified by KEY. */
1527 limfield (const struct line
*line
, const struct keyfield
*key
)
1529 char *ptr
= line
->text
, *lim
= ptr
+ line
->length
- 1;
1530 size_t eword
= key
->eword
, echar
= key
->echar
;
1533 eword
++; /* Skip all of end field. */
1535 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1536 whichever comes first. If there are more than EWORD fields, leave
1537 PTR pointing at the beginning of the field having zero-based index,
1538 EWORD. If a delimiter character was specified (via -t), then that
1539 `beginning' is the first character following the delimiting TAB.
1540 Otherwise, leave PTR pointing at the first `blank' character after
1541 the preceding field. */
1542 if (tab
!= TAB_DEFAULT
)
1543 while (ptr
< lim
&& eword
--)
1545 while (ptr
< lim
&& *ptr
!= tab
)
1547 if (ptr
< lim
&& (eword
|| echar
))
1551 while (ptr
< lim
&& eword
--)
1553 while (ptr
< lim
&& blanks
[to_uchar (*ptr
)])
1555 while (ptr
< lim
&& !blanks
[to_uchar (*ptr
)])
1559 #ifdef POSIX_UNSPECIFIED
1560 /* The following block of code makes GNU sort incompatible with
1561 standard Unix sort, so it's ifdef'd out for now.
1562 The POSIX spec isn't clear on how to interpret this.
1563 FIXME: request clarification.
1565 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1566 Date: Thu, 30 May 96 12:20:41 -0400
1567 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1569 [...]I believe I've found another bug in `sort'.
1574 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1577 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1581 Unix sort produced the answer I expected: sort on the single character
1582 in column 7. GNU sort produced different results, because it disagrees
1583 on the interpretation of the key-end spec "M.N". Unix sort reads this
1584 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1585 "skip M-1 fields, then either N-1 characters or the rest of the current
1586 field, whichever comes first". This extra clause applies only to
1587 key-ends, not key-starts.
1590 /* Make LIM point to the end of (one byte past) the current field. */
1591 if (tab
!= TAB_DEFAULT
)
1594 newlim
= memchr (ptr
, tab
, lim
- ptr
);
1602 while (newlim
< lim
&& blanks
[to_uchar (*newlim
)])
1604 while (newlim
< lim
&& !blanks
[to_uchar (*newlim
)])
1610 if (echar
!= 0) /* We need to skip over a portion of the end field. */
1612 /* If we're ignoring leading blanks when computing the End
1613 of the field, skip past them here. */
1614 if (key
->skipeblanks
)
1615 while (ptr
< lim
&& blanks
[to_uchar (*ptr
)])
1618 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1619 ptr
= MIN (lim
, ptr
+ echar
);
1625 /* Fill BUF reading from FP, moving buf->left bytes from the end
1626 of buf->buf to the beginning first. If EOF is reached and the
1627 file wasn't terminated by a newline, supply one. Set up BUF's line
1628 table too. FILE is the name of the file corresponding to FP.
1629 Return true if some input was read. */
1632 fillbuf (struct buffer
*buf
, FILE *fp
, char const *file
)
1634 struct keyfield
const *key
= keylist
;
1636 size_t line_bytes
= buf
->line_bytes
;
1637 size_t mergesize
= merge_buffer_size
- MIN_MERGE_BUFFER_SIZE
;
1642 if (buf
->used
!= buf
->left
)
1644 memmove (buf
->buf
, buf
->buf
+ buf
->used
- buf
->left
, buf
->left
);
1645 buf
->used
= buf
->left
;
1651 char *ptr
= buf
->buf
+ buf
->used
;
1652 struct line
*linelim
= buffer_linelim (buf
);
1653 struct line
*line
= linelim
- buf
->nlines
;
1654 size_t avail
= (char *) linelim
- buf
->nlines
* line_bytes
- ptr
;
1655 char *line_start
= buf
->nlines
? line
->text
+ line
->length
: buf
->buf
;
1657 while (line_bytes
+ 1 < avail
)
1659 /* Read as many bytes as possible, but do not read so many
1660 bytes that there might not be enough room for the
1661 corresponding line array. The worst case is when the
1662 rest of the input file consists entirely of newlines,
1663 except that the last byte is not a newline. */
1664 size_t readsize
= (avail
- 1) / (line_bytes
+ 1);
1665 size_t bytes_read
= fread (ptr
, 1, readsize
, fp
);
1666 char *ptrlim
= ptr
+ bytes_read
;
1668 avail
-= bytes_read
;
1670 if (bytes_read
!= readsize
)
1673 die (_("read failed"), file
);
1677 if (buf
->buf
== ptrlim
)
1679 if (ptrlim
[-1] != eol
)
1684 /* Find and record each line in the just-read input. */
1685 while ((p
= memchr (ptr
, eol
, ptrlim
- ptr
)))
1689 line
->text
= line_start
;
1690 line
->length
= ptr
- line_start
;
1691 mergesize
= MAX (mergesize
, line
->length
);
1692 avail
-= line_bytes
;
1696 /* Precompute the position of the first key for
1698 line
->keylim
= (key
->eword
== SIZE_MAX
1700 : limfield (line
, key
));
1702 if (key
->sword
!= SIZE_MAX
)
1703 line
->keybeg
= begfield (line
, key
);
1706 if (key
->skipsblanks
)
1707 while (blanks
[to_uchar (*line_start
)])
1709 line
->keybeg
= line_start
;
1721 buf
->used
= ptr
- buf
->buf
;
1722 buf
->nlines
= buffer_linelim (buf
) - line
;
1723 if (buf
->nlines
!= 0)
1725 buf
->left
= ptr
- line_start
;
1726 merge_buffer_size
= mergesize
+ MIN_MERGE_BUFFER_SIZE
;
1731 /* The current input line is too long to fit in the buffer.
1732 Double the buffer size and try again, keeping it properly
1734 size_t line_alloc
= buf
->alloc
/ sizeof (struct line
);
1735 buf
->buf
= x2nrealloc (buf
->buf
, &line_alloc
, sizeof (struct line
));
1736 buf
->alloc
= line_alloc
* sizeof (struct line
);
1741 /* Exit with an error if a mixture of SI and IEC units detected. */
1744 check_mixed_SI_IEC (char prefix
, struct keyfield
*key
)
1746 int iec_present
= prefix
== 'i';
1749 if (key
->iec_present
!= -1 && iec_present
!= key
->iec_present
)
1750 error (SORT_FAILURE
, 0, _("both SI and IEC prefixes present on units"));
1751 key
->iec_present
= iec_present
;
1756 /* Return an integer which represents the order of magnitude of
1757 the unit following the number. NUMBER can contain thousands separators
1758 or a decimal point, but not have preceeding blanks.
1759 Negative numbers return a negative unit order. */
1762 find_unit_order (const char *number
, struct keyfield
*key
, char const **endptr
)
1764 static const char orders
[UCHAR_LIM
] =
1766 #if SOME_DAY_WE_WILL_REQUIRE_C99
1767 ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
1770 /* Generate the following table with this command:
1771 perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
1772 foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
1774 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1775 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1776 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3,
1777 0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
1778 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1779 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1780 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1781 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1782 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1783 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1784 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1788 const unsigned char *p
= number
;
1798 /* Scan to end of number.
1799 Decimals or separators not followed by digits stop the scan.
1800 Numbers ending in decimals or separators are thus considered
1801 to be lacking in units.
1802 FIXME: add support for multibyte thousands_sep and decimal_point. */
1804 while (ISDIGIT (*p
))
1808 if (*p
== decimal_point
&& ISDIGIT (*(p
+ 1)))
1810 else if (*p
== thousands_sep
&& ISDIGIT (*(p
+ 1)))
1814 int order
= orders
[*p
];
1816 /* For valid units check for MiB vs MB etc. */
1820 p
+= check_mixed_SI_IEC (*p
, key
);
1826 return sign
* order
;
1829 /* Compare numbers ending in units with SI xor IEC prefixes
1830 <none/unknown> < K/k < M < G < T < P < E < Z < Y
1831 Assume that numbers are properly abbreviated.
1832 i.e. input will never have both 6000K and 5M. */
1835 human_numcompare (const char *a
, const char *b
, struct keyfield
*key
,
1838 while (blanks
[to_uchar (*a
)])
1840 while (blanks
[to_uchar (*b
)])
1843 int order_a
= find_unit_order (a
, key
, ea
);
1844 int order_b
= find_unit_order (b
, key
, NULL
);
1846 return (order_a
> order_b
? 1
1847 : order_a
< order_b
? -1
1848 : strnumcmp (a
, b
, decimal_point
, thousands_sep
));
1851 /* Compare strings A and B as numbers without explicitly converting them to
1852 machine numbers. Comparatively slow for short strings, but asymptotically
1856 numcompare (const char *a
, const char *b
, char const **ea
)
1858 while (blanks
[to_uchar (*a
)])
1860 while (blanks
[to_uchar (*b
)])
1865 /* Approximate strnumcmp extents with find_unit_order. */
1866 if (find_unit_order (a
, NULL
, ea
))
1868 *ea
-= 1; /* ignore the order letter */
1869 *ea
-= (**ea
== 'i'); /* and IEC prefix */
1873 return strnumcmp (a
, b
, decimal_point
, thousands_sep
);
1877 general_numcompare (const char *sa
, const char *sb
, char const **ea
)
1879 /* FIXME: maybe add option to try expensive FP conversion
1880 only if A and B can't be compared more cheaply/accurately. */
1882 #if HAVE_C99_STRTOLD /* provided by c-strtold module. */
1883 # define long_double long double
1885 # define long_double double
1887 # define strtold strtod
1891 long_double a
= strtold (sa
, (char **) ea
);
1892 long_double b
= strtold (sb
, &eb
);
1894 /* Put conversion errors at the start of the collating sequence. */
1896 return sb
== eb
? 0 : -1;
1900 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1901 conversion errors but before numbers; sort them by internal
1902 bit-pattern, for lack of a more portable alternative. */
1908 : memcmp ((char *) &a
, (char *) &b
, sizeof a
));
1911 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1912 Return 0 if the name in S is not recognized. */
1915 getmonth (char const *month
, size_t len
, char const **ea
)
1918 size_t hi
= MONTHS_PER_YEAR
;
1919 char const *monthlim
= month
+ len
;
1923 if (month
== monthlim
)
1925 if (!blanks
[to_uchar (*month
)])
1932 size_t ix
= (lo
+ hi
) / 2;
1933 char const *m
= month
;
1934 char const *n
= monthtab
[ix
].name
;
1942 return monthtab
[ix
].val
;
1944 if (m
== monthlim
|| fold_toupper
[to_uchar (*m
)] < to_uchar (*n
))
1949 else if (fold_toupper
[to_uchar (*m
)] > to_uchar (*n
))
1961 /* A source of random data. */
1962 static struct randread_source
*randread_source
;
1964 /* Return the Ith randomly-generated state. The caller must invoke
1965 random_state (H) for all H less than I before invoking random_state
1968 static struct md5_ctx
1969 random_state (size_t i
)
1971 /* An array of states resulting from the random data, and counts of
1972 its used and allocated members. */
1973 static struct md5_ctx
*state
;
1975 static size_t allocated
;
1977 struct md5_ctx
*s
= &state
[i
];
1981 unsigned char buf
[MD5_DIGEST_SIZE
];
1987 state
= X2NREALLOC (state
, &allocated
);
1991 randread (randread_source
, buf
, sizeof buf
);
1993 md5_process_bytes (buf
, sizeof buf
, s
);
1999 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
2000 with length LENGTHB. Return negative if less, zero if equal,
2001 positive if greater. */
2004 cmp_hashes (char const *texta
, size_t lena
,
2005 char const *textb
, size_t lenb
)
2007 /* Try random hashes until a pair of hashes disagree. But if the
2008 first pair of random hashes agree, check whether the keys are
2009 identical and if so report no difference. */
2014 uint32_t dig
[2][MD5_DIGEST_SIZE
/ sizeof (uint32_t)];
2015 struct md5_ctx s
[2];
2016 s
[0] = s
[1] = random_state (i
);
2017 md5_process_bytes (texta
, lena
, &s
[0]); md5_finish_ctx (&s
[0], dig
[0]);
2018 md5_process_bytes (textb
, lenb
, &s
[1]); md5_finish_ctx (&s
[1], dig
[1]);
2019 diff
= memcmp (dig
[0], dig
[1], sizeof dig
[0]);
2022 if (i
== 0 && lena
== lenb
&& memcmp (texta
, textb
, lena
) == 0)
2029 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
2030 using one or more random hash functions. */
2033 compare_random (char *restrict texta
, size_t lena
,
2034 char *restrict textb
, size_t lenb
)
2038 if (! hard_LC_COLLATE
)
2039 diff
= cmp_hashes (texta
, lena
, textb
, lenb
);
2042 /* Transform the text into the basis of comparison, so that byte
2043 strings that would otherwise considered to be equal are
2044 considered equal here even if their bytes differ. */
2047 char stackbuf
[4000];
2048 size_t tlena
= xmemxfrm (stackbuf
, sizeof stackbuf
, texta
, lena
);
2049 bool a_fits
= tlena
<= sizeof stackbuf
;
2050 size_t tlenb
= xmemxfrm ((a_fits
? stackbuf
+ tlena
: NULL
),
2051 (a_fits
? sizeof stackbuf
- tlena
: 0),
2054 if (a_fits
&& tlena
+ tlenb
<= sizeof stackbuf
)
2058 /* Adding 1 to the buffer size lets xmemxfrm run a bit
2059 faster by avoiding the need for an extra buffer copy. */
2060 buf
= xmalloc (tlena
+ tlenb
+ 1);
2061 xmemxfrm (buf
, tlena
+ 1, texta
, lena
);
2062 xmemxfrm (buf
+ tlena
, tlenb
+ 1, textb
, lenb
);
2065 diff
= cmp_hashes (buf
, tlena
, buf
+ tlena
, tlenb
);
2067 if (buf
!= stackbuf
)
2074 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
2075 using filevercmp. See lib/filevercmp.h for function description. */
2078 compare_version (char *restrict texta
, size_t lena
,
2079 char *restrict textb
, size_t lenb
)
2083 /* It is necessary to save the character after the end of the field.
2084 "filevercmp" works with NUL terminated strings. Our blocks of
2085 text are not necessarily terminated with a NUL byte. */
2086 char sv_a
= texta
[lena
];
2087 char sv_b
= textb
[lenb
];
2092 diff
= filevercmp (texta
, textb
);
2100 /* For debug mode, count tabs in the passed string
2101 so we can adjust the widths returned by mbswidth.
2102 FIXME: Should we generally be counting non printable chars? */
2105 count_tabs (char const *text
, const size_t len
)
2108 size_t tlen
= strnlen (text
, len
);
2112 if (*text
++ == '\t')
2119 /* For debug mode, "underline" a key at the
2120 specified offset and screen width. */
2123 mark_key (size_t offset
, size_t width
)
2125 printf ("%*s", (int) offset
, "");
2128 printf (_("^ no match for key\n"));
2137 /* For debug mode, determine the screen offset and width
2138 to highlight for a key, and then output the highlight. */
2141 debug_key (char const *sline
, char const *sfield
, char const *efield
,
2142 size_t flen
, bool skipb
)
2144 char const *sa
= sfield
;
2146 if (skipb
) /* This key type implicitly skips leading blanks. */
2148 while (sa
< efield
&& blanks
[to_uchar (*sa
)])
2152 flen
--; /* This assumes TABs same width as SPACEs. */
2156 size_t offset
= mbsnwidth (sline
, sfield
- sline
, 0) + (sa
- sfield
);
2157 offset
+= count_tabs (sline
, sfield
- sline
);
2159 size_t width
= mbsnwidth (sa
, flen
, 0);
2160 width
+= count_tabs (sa
, flen
);
2162 mark_key (offset
, width
);
2165 /* Testing if a key is numeric is done in various places. */
2168 key_numeric (struct keyfield
const *key
)
2170 return key
->numeric
|| key
->general_numeric
|| key
->human_numeric
;
2173 /* Return whether sorting options specified for key. */
2176 default_key_compare (struct keyfield
const *key
)
2178 return ! (key
->ignore
2182 || key_numeric (key
)
2186 /* || key->reverse */
2190 /* Convert a key to the short options used to specify it. */
2193 key_to_opts (struct keyfield
const *key
, char *opts
)
2195 if (key
->skipsblanks
|| key
->skipeblanks
)
2196 *opts
++ = 'b';/* either disables global -b */
2197 if (key
->ignore
== nondictionary
)
2201 if (key
->general_numeric
)
2203 if (key
->human_numeric
)
2205 if (key
->ignore
== nonprinting
)
2220 /* Output data independent key warnings to stderr. */
2223 key_warnings (struct keyfield
const *gkey
, bool gkey_only
)
2225 struct keyfield
const *key
;
2226 struct keyfield ugkey
= *gkey
;
2227 unsigned long keynum
= 1;
2229 for (key
= keylist
; key
; key
= key
->next
, keynum
++)
2231 if (key
->obsolete_used
)
2233 /* obsolescent syntax +A.x -B.y is equivalent to:
2234 -k A+1.x+1,B.y (when y = 0)
2235 -k A+1.x+1,B+1.y (when y > 0) */
2236 char obuf
[INT_BUFSIZE_BOUND (size_t) * 2 + 4]; /* +# -# */
2237 char nbuf
[INT_BUFSIZE_BOUND (size_t) * 2 + 5]; /* -k #,# */
2242 size_t sword
= key
->sword
;
2243 size_t eword
= key
->eword
;
2244 if (sword
== SIZE_MAX
)
2247 po
+= sprintf (po
, "+%" PRIuMAX
, (uintmax_t) sword
);
2248 pn
+= sprintf (pn
, "-k %" PRIuMAX
, (uintmax_t) sword
+ 1);
2249 if (key
->eword
!= SIZE_MAX
)
2251 po
+= sprintf (po
, " -%" PRIuMAX
, (uintmax_t) eword
+ 1);
2252 pn
+= sprintf (pn
, ",%" PRIuMAX
,
2253 (uintmax_t) eword
+ 1 + (key
->echar
== SIZE_MAX
));
2255 error (0, 0, _("obsolescent key `%s' used; consider `%s' instead"),
2259 /* Warn about field specs that will never match. */
2260 if (key
->sword
!= SIZE_MAX
&& key
->eword
< key
->sword
)
2261 error (0, 0, _("key %lu has zero width and will be ignored"), keynum
);
2263 /* Warn about significant leading blanks. */
2264 bool implicit_skip
= key_numeric (key
) || key
->month
;
2265 bool maybe_space_aligned
= !hard_LC_COLLATE
&& default_key_compare (key
)
2266 && !(key
->schar
|| key
->echar
);
2267 bool line_offset
= key
->eword
== 0 && key
->echar
!= 0; /* -k1.x,1.y */
2268 if (!gkey_only
&& tab
== TAB_DEFAULT
&& !line_offset
2269 && ((!key
->skipsblanks
&& !(implicit_skip
|| maybe_space_aligned
))
2270 || (!key
->skipsblanks
&& key
->schar
)
2271 || (!key
->skipeblanks
&& key
->echar
)))
2272 error (0, 0, _("leading blanks are significant in key %lu; "
2273 "consider also specifying `b'"), keynum
);
2275 /* Warn about numeric comparisons spanning fields,
2276 as field delimiters could be interpreted as part
2277 of the number (maybe only in other locales). */
2278 if (!gkey_only
&& key_numeric (key
))
2280 size_t sword
= key
->sword
+ 1;
2281 size_t eword
= key
->eword
+ 1;
2285 error (0, 0, _("key %lu is numeric and spans multiple fields"),
2289 /* Flag global options not copied or specified in any key. */
2290 if (ugkey
.ignore
&& (ugkey
.ignore
== key
->ignore
))
2291 ugkey
.ignore
= NULL
;
2292 if (ugkey
.translate
&& (ugkey
.translate
== key
->translate
))
2293 ugkey
.translate
= NULL
;
2294 ugkey
.skipsblanks
&= !key
->skipsblanks
;
2295 ugkey
.skipeblanks
&= !key
->skipeblanks
;
2296 ugkey
.month
&= !key
->month
;
2297 ugkey
.numeric
&= !key
->numeric
;
2298 ugkey
.general_numeric
&= !key
->general_numeric
;
2299 ugkey
.human_numeric
&= !key
->human_numeric
;
2300 ugkey
.random
&= !key
->random
;
2301 ugkey
.version
&= !key
->version
;
2302 ugkey
.reverse
&= !key
->reverse
;
2305 /* Warn about ignored global options flagged above.
2306 Note if gkey is the only one in the list, all flags are cleared. */
2307 if (!default_key_compare (&ugkey
)
2308 || (ugkey
.reverse
&& (stable
|| unique
) && keylist
))
2310 bool ugkey_reverse
= ugkey
.reverse
;
2311 if (!(stable
|| unique
))
2312 ugkey
.reverse
= false;
2313 /* The following is too big, but guaranteed to be "big enough". */
2314 char opts
[sizeof short_options
];
2315 key_to_opts (&ugkey
, opts
);
2317 ngettext ("option `-%s' is ignored",
2318 "options `-%s' are ignored",
2319 select_plural (strlen (opts
))), opts
);
2320 ugkey
.reverse
= ugkey_reverse
;
2322 if (ugkey
.reverse
&& !(stable
|| unique
) && keylist
)
2323 error (0, 0, _("option `-r' only applies to last-resort comparison"));
2326 /* Compare two lines A and B trying every key in sequence until there
2327 are no more keys or a difference is found. */
2330 keycompare (const struct line
*a
, const struct line
*b
, bool show_debug
)
2332 struct keyfield
*key
= keylist
;
2334 /* For the first iteration only, the key positions have been
2335 precomputed for us. */
2336 char *texta
= a
->keybeg
;
2337 char *textb
= b
->keybeg
;
2338 char *lima
= a
->keylim
;
2339 char *limb
= b
->keylim
;
2345 char const *translate
= key
->translate
;
2346 bool const *ignore
= key
->ignore
;
2347 bool skipb
= false; /* Whether key type auto skips leading blanks. */
2349 /* Treat field ends before field starts as empty fields. */
2350 lima
= MAX (texta
, lima
);
2351 limb
= MAX (textb
, limb
);
2353 /* Find the lengths. */
2354 size_t lena
= lima
- texta
;
2355 size_t lenb
= limb
- textb
;
2357 /* Actually compare the fields. */
2360 diff
= compare_random (texta
, lena
, textb
, lenb
);
2361 else if (key_numeric (key
))
2363 char savea
= *lima
, saveb
= *limb
;
2364 char const* ea
= lima
;
2366 *lima
= *limb
= '\0';
2367 diff
= (key
->numeric
? numcompare (texta
, textb
, &ea
)
2368 : key
->general_numeric
? general_numcompare (texta
, textb
,
2370 : human_numcompare (texta
, textb
, key
, &ea
));
2376 *lima
= savea
, *limb
= saveb
;
2378 else if (key
->version
)
2379 diff
= compare_version (texta
, lena
, textb
, lenb
);
2380 else if (key
->month
)
2382 char const *ea
= lima
;
2384 int amon
= getmonth (texta
, lena
, &ea
);
2385 diff
= amon
- getmonth (textb
, lenb
, NULL
);
2389 lena
= amon
? ea
- texta
: 0;
2393 /* Sorting like this may become slow, so in a simple locale the user
2394 can select a faster sort that is similar to ascii sort. */
2395 else if (hard_LC_COLLATE
)
2397 /* FIXME: for debug, account for skipped chars, while handling mb chars.
2398 Generally perhaps xmemfrm could be used to determine chars that are
2399 excluded from the collating order? */
2400 if (ignore
|| translate
)
2403 size_t size
= lena
+ 1 + lenb
+ 1;
2404 char *copy_a
= (size
<= sizeof buf
? buf
: xmalloc (size
));
2405 char *copy_b
= copy_a
+ lena
+ 1;
2406 size_t new_len_a
, new_len_b
, i
;
2408 /* Ignore and/or translate chars before comparing. */
2409 for (new_len_a
= new_len_b
= i
= 0; i
< MAX (lena
, lenb
); i
++)
2413 copy_a
[new_len_a
] = (translate
2414 ? translate
[to_uchar (texta
[i
])]
2416 if (!ignore
|| !ignore
[to_uchar (texta
[i
])])
2421 copy_b
[new_len_b
] = (translate
2422 ? translate
[to_uchar (textb
[i
])]
2424 if (!ignore
|| !ignore
[to_uchar (textb
[i
])])
2429 diff
= xmemcoll (copy_a
, new_len_a
, copy_b
, new_len_b
);
2431 if (sizeof buf
< size
)
2435 diff
= - NONZERO (lenb
);
2439 diff
= xmemcoll (texta
, lena
, textb
, lenb
);
2443 char *savea
= texta
;
2445 #define CMP_WITH_IGNORE(A, B) \
2450 while (texta < lima && ignore[to_uchar (*texta)]) \
2452 while (textb < limb && ignore[to_uchar (*textb)]) \
2454 if (! (texta < lima && textb < limb)) \
2456 diff = to_uchar (A) - to_uchar (B); \
2463 diff = (texta < lima) - (textb < limb); \
2468 CMP_WITH_IGNORE (translate
[to_uchar (*texta
)],
2469 translate
[to_uchar (*textb
)]);
2471 CMP_WITH_IGNORE (*texta
, *textb
);
2473 /* We only need to restore this for debug_key
2474 in which case the keys being compared are equal. */
2478 diff
= - NONZERO (lenb
);
2485 char *savea
= texta
;
2487 while (texta
< lima
&& textb
< limb
)
2489 diff
= (to_uchar (translate
[to_uchar (*texta
++)])
2490 - to_uchar (translate
[to_uchar (*textb
++)]));
2495 /* We only need to restore this for debug_key
2496 in which case the keys being compared are equal. */
2501 diff
= memcmp (texta
, textb
, MIN (lena
, lenb
));
2505 diff
= lena
< lenb
? -1 : lena
!= lenb
;
2512 debug_key (a
->text
, texta
, lima
, lena
, skipb
);
2518 /* Find the beginning and limit of the next field. */
2519 if (key
->eword
!= SIZE_MAX
)
2520 lima
= limfield (a
, key
), limb
= limfield (b
, key
);
2522 lima
= a
->text
+ a
->length
- 1, limb
= b
->text
+ b
->length
- 1;
2524 if (key
->sword
!= SIZE_MAX
)
2525 texta
= begfield (a
, key
), textb
= begfield (b
, key
);
2528 texta
= a
->text
, textb
= b
->text
;
2529 if (key
->skipsblanks
)
2531 while (texta
< lima
&& blanks
[to_uchar (*texta
)])
2533 while (textb
< limb
&& blanks
[to_uchar (*textb
)])
2544 return key
->reverse
? -diff
: diff
;
2547 /* Compare two lines A and B, returning negative, zero, or positive
2548 depending on whether A compares less than, equal to, or greater than B. */
2551 compare (const struct line
*a
, const struct line
*b
, bool show_debug
)
2556 /* First try to compare on the specified keys (if any).
2557 The only two cases with no key at all are unadorned sort,
2558 and unadorned sort -r. */
2561 diff
= keycompare (a
, b
, show_debug
);
2562 if (diff
|| unique
|| stable
)
2566 /* If the keys all compare equal (or no keys were specified)
2567 fall through to the default comparison. */
2568 alen
= a
->length
- 1, blen
= b
->length
- 1;
2571 debug_key (a
->text
, a
->text
, a
->text
+ alen
, alen
, false);
2574 diff
= - NONZERO (blen
);
2577 else if (hard_LC_COLLATE
)
2578 diff
= xmemcoll (a
->text
, alen
, b
->text
, blen
);
2579 else if (! (diff
= memcmp (a
->text
, b
->text
, MIN (alen
, blen
))))
2580 diff
= alen
< blen
? -1 : alen
!= blen
;
2582 return reverse
? -diff
: diff
;
2586 write_bytes (struct line
const *line
, FILE *fp
, char const *output_file
)
2588 char const *buf
= line
->text
;
2589 size_t n_bytes
= line
->length
;
2591 /* Convert TABs to '>' and \0 to \n when -z specified. */
2592 if (debug
&& fp
== stdout
)
2594 char const *ebuf
= buf
+ n_bytes
;
2595 char const *c
= buf
;
2602 else if (wc
== 0 && eolchar
== 0)
2604 if (fputc (wc
, fp
) == EOF
)
2605 die (_("write failed"), output_file
);
2608 compare (line
, line
, true);
2612 if (fwrite (buf
, 1, n_bytes
, fp
) != n_bytes
)
2613 die (_("write failed"), output_file
);
2617 /* Check that the lines read from FILE_NAME come in order. Return
2618 true if they are in order. If CHECKONLY == 'c', also print a
2619 diagnostic (FILE_NAME, line number, contents of line) to stderr if
2620 they are not in order. */
2623 check (char const *file_name
, char checkonly
)
2625 FILE *fp
= xfopen (file_name
, "r");
2626 struct buffer buf
; /* Input buffer. */
2627 struct line temp
; /* Copy of previous line. */
2629 uintmax_t line_number
= 0;
2630 struct keyfield
const *key
= keylist
;
2631 bool nonunique
= ! unique
;
2632 bool ordered
= true;
2634 initbuf (&buf
, sizeof (struct line
),
2635 MAX (merge_buffer_size
, sort_size
));
2638 while (fillbuf (&buf
, fp
, file_name
))
2640 struct line
const *line
= buffer_linelim (&buf
);
2641 struct line
const *linebase
= line
- buf
.nlines
;
2643 /* Make sure the line saved from the old buffer contents is
2644 less than or equal to the first line of the new buffer. */
2645 if (alloc
&& nonunique
<= compare (&temp
, line
- 1, false))
2649 if (checkonly
== 'c')
2651 struct line
const *disorder_line
= line
- 1;
2652 uintmax_t disorder_line_number
=
2653 buffer_linelim (&buf
) - disorder_line
+ line_number
;
2654 char hr_buf
[INT_BUFSIZE_BOUND (uintmax_t)];
2655 fprintf (stderr
, _("%s: %s:%s: disorder: "),
2656 program_name
, file_name
,
2657 umaxtostr (disorder_line_number
, hr_buf
));
2659 fputc ('\n', stderr
);
2660 write_bytes (disorder_line
, debug
? stdout
: stderr
,
2661 debug
? _("standard out") : _("standard error"));
2669 /* Compare each line in the buffer with its successor. */
2670 while (linebase
< --line
)
2671 if (nonunique
<= compare (line
, line
- 1, false))
2672 goto found_disorder
;
2674 line_number
+= buf
.nlines
;
2676 /* Save the last line of the buffer. */
2677 if (alloc
< line
->length
)
2684 alloc
= line
->length
;
2688 while (alloc
< line
->length
);
2690 temp
.text
= xrealloc (temp
.text
, alloc
);
2692 memcpy (temp
.text
, line
->text
, line
->length
);
2693 temp
.length
= line
->length
;
2696 temp
.keybeg
= temp
.text
+ (line
->keybeg
- line
->text
);
2697 temp
.keylim
= temp
.text
+ (line
->keylim
- line
->text
);
2701 xfclose (fp
, file_name
);
2707 /* Open FILES (there are NFILES of them) and store the resulting array
2708 of stream pointers into (*PFPS). Allocate the array. Return the
2709 number of successfully opened files, setting errno if this value is
2710 less than NFILES. */
2713 open_input_files (struct sortfile
*files
, size_t nfiles
, FILE ***pfps
)
2715 FILE **fps
= *pfps
= xnmalloc (nfiles
, sizeof *fps
);
2718 /* Open as many input files as we can. */
2719 for (i
= 0; i
< nfiles
; i
++)
2721 fps
[i
] = (files
[i
].pid
2722 ? open_temp (files
[i
].name
, files
[i
].pid
)
2723 : stream_open (files
[i
].name
, "r"));
2731 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2732 files (all of which are at the start of the FILES array), and
2733 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2734 FPS is the vector of open stream corresponding to the files.
2735 Close input and output streams before returning.
2736 OUTPUT_FILE gives the name of the output file. If it is NULL,
2737 the output file is standard output. */
2740 mergefps (struct sortfile
*files
, size_t ntemps
, size_t nfiles
,
2741 FILE *ofp
, char const *output_file
, FILE **fps
)
2743 struct buffer
*buffer
= xnmalloc (nfiles
, sizeof *buffer
);
2744 /* Input buffers for each file. */
2745 struct line saved
; /* Saved line storage for unique check. */
2746 struct line
const *savedline
= NULL
;
2747 /* &saved if there is a saved line. */
2748 size_t savealloc
= 0; /* Size allocated for the saved line. */
2749 struct line
const **cur
= xnmalloc (nfiles
, sizeof *cur
);
2750 /* Current line in each line table. */
2751 struct line
const **base
= xnmalloc (nfiles
, sizeof *base
);
2752 /* Base of each line table. */
2753 size_t *ord
= xnmalloc (nfiles
, sizeof *ord
);
2754 /* Table representing a permutation of fps,
2755 such that cur[ord[0]] is the smallest line
2756 and will be next output. */
2760 struct keyfield
const *key
= keylist
;
2763 /* Read initial lines from each input file. */
2764 for (i
= 0; i
< nfiles
; )
2766 initbuf (&buffer
[i
], sizeof (struct line
),
2767 MAX (merge_buffer_size
, sort_size
/ nfiles
));
2768 if (fillbuf (&buffer
[i
], fps
[i
], files
[i
].name
))
2770 struct line
const *linelim
= buffer_linelim (&buffer
[i
]);
2771 cur
[i
] = linelim
- 1;
2772 base
[i
] = linelim
- buffer
[i
].nlines
;
2777 /* fps[i] is empty; eliminate it from future consideration. */
2778 xfclose (fps
[i
], files
[i
].name
);
2782 zaptemp (files
[i
].name
);
2784 free (buffer
[i
].buf
);
2786 for (j
= i
; j
< nfiles
; ++j
)
2788 files
[j
] = files
[j
+ 1];
2789 fps
[j
] = fps
[j
+ 1];
2794 /* Set up the ord table according to comparisons among input lines.
2795 Since this only reorders two items if one is strictly greater than
2796 the other, it is stable. */
2797 for (i
= 0; i
< nfiles
; ++i
)
2799 for (i
= 1; i
< nfiles
; ++i
)
2800 if (0 < compare (cur
[ord
[i
- 1]], cur
[ord
[i
]], false))
2801 t
= ord
[i
- 1], ord
[i
- 1] = ord
[i
], ord
[i
] = t
, i
= 0;
2803 /* Repeatedly output the smallest line until no input remains. */
2806 struct line
const *smallest
= cur
[ord
[0]];
2808 /* If uniquified output is turned on, output only the first of
2809 an identical series of lines. */
2812 if (savedline
&& compare (savedline
, smallest
, false))
2815 write_bytes (&saved
, ofp
, output_file
);
2820 if (savealloc
< smallest
->length
)
2825 savealloc
= smallest
->length
;
2828 while ((savealloc
*= 2) < smallest
->length
);
2830 saved
.text
= xrealloc (saved
.text
, savealloc
);
2832 saved
.length
= smallest
->length
;
2833 memcpy (saved
.text
, smallest
->text
, saved
.length
);
2837 saved
.text
+ (smallest
->keybeg
- smallest
->text
);
2839 saved
.text
+ (smallest
->keylim
- smallest
->text
);
2844 write_bytes (smallest
, ofp
, output_file
);
2846 /* Check if we need to read more lines into core. */
2847 if (base
[ord
[0]] < smallest
)
2848 cur
[ord
[0]] = smallest
- 1;
2851 if (fillbuf (&buffer
[ord
[0]], fps
[ord
[0]], files
[ord
[0]].name
))
2853 struct line
const *linelim
= buffer_linelim (&buffer
[ord
[0]]);
2854 cur
[ord
[0]] = linelim
- 1;
2855 base
[ord
[0]] = linelim
- buffer
[ord
[0]].nlines
;
2859 /* We reached EOF on fps[ord[0]]. */
2860 for (i
= 1; i
< nfiles
; ++i
)
2861 if (ord
[i
] > ord
[0])
2864 xfclose (fps
[ord
[0]], files
[ord
[0]].name
);
2865 if (ord
[0] < ntemps
)
2868 zaptemp (files
[ord
[0]].name
);
2870 free (buffer
[ord
[0]].buf
);
2871 for (i
= ord
[0]; i
< nfiles
; ++i
)
2873 fps
[i
] = fps
[i
+ 1];
2874 files
[i
] = files
[i
+ 1];
2875 buffer
[i
] = buffer
[i
+ 1];
2876 cur
[i
] = cur
[i
+ 1];
2877 base
[i
] = base
[i
+ 1];
2879 for (i
= 0; i
< nfiles
; ++i
)
2880 ord
[i
] = ord
[i
+ 1];
2885 /* The new line just read in may be larger than other lines
2886 already in main memory; push it back in the queue until we
2887 encounter a line larger than it. Optimize for the common
2888 case where the new line is smallest. */
2893 size_t ord0
= ord
[0];
2894 size_t count_of_smaller_lines
;
2898 int cmp
= compare (cur
[ord0
], cur
[ord
[probe
]], false);
2899 if (cmp
< 0 || (cmp
== 0 && ord0
< ord
[probe
]))
2903 probe
= (lo
+ hi
) / 2;
2906 count_of_smaller_lines
= lo
- 1;
2907 for (j
= 0; j
< count_of_smaller_lines
; j
++)
2908 ord
[j
] = ord
[j
+ 1];
2909 ord
[count_of_smaller_lines
] = ord0
;
2912 /* Free up some resources every once in a while. */
2913 if (MAX_PROCS_BEFORE_REAP
< nprocs
)
2917 if (unique
&& savedline
)
2919 write_bytes (&saved
, ofp
, output_file
);
2923 xfclose (ofp
, output_file
);
2931 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2932 files (all of which are at the start of the FILES array), and
2933 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2934 Close input and output files before returning.
2935 OUTPUT_FILE gives the name of the output file.
2937 Return the number of files successfully merged. This number can be
2938 less than NFILES if we ran low on file descriptors, but in this
2939 case it is never less than 2. */
2942 mergefiles (struct sortfile
*files
, size_t ntemps
, size_t nfiles
,
2943 FILE *ofp
, char const *output_file
)
2946 size_t nopened
= open_input_files (files
, nfiles
, &fps
);
2947 if (nopened
< nfiles
&& nopened
< 2)
2948 die (_("open failed"), files
[nopened
].name
);
2949 mergefps (files
, ntemps
, nopened
, ofp
, output_file
, fps
);
2953 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2954 and HI (with NHI members). T, LO, and HI point just past their
2955 respective arrays, and the arrays are in reverse order. NLO and
2956 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2959 mergelines (struct line
*t
,
2960 struct line
const *lo
, size_t nlo
,
2961 struct line
const *hi
, size_t nhi
)
2964 if (compare (lo
- 1, hi
- 1, false) <= 0)
2969 /* HI - NHI equalled T - (NLO + NHI) when this function
2970 began. Therefore HI must equal T now, and there is no
2971 need to copy from HI to T. */
2989 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2990 NLINES must be at least 2.
2991 The input and output arrays are in reverse order, and LINES and
2992 TEMP point just past the end of their respective arrays.
2994 Use a recursive divide-and-conquer algorithm, in the style
2995 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2996 the optimization suggested by exercise 5.2.4-10; this requires room
2997 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2998 writes that this memory optimization was originally published by
2999 D. A. Bell, Comp J. 1 (1958), 75. */
3002 sortlines (struct line
*lines
, size_t nlines
, struct line
*temp
)
3006 if (0 < compare (&lines
[-1], &lines
[-2], false))
3008 struct line tmp
= lines
[-1];
3009 lines
[-1] = lines
[-2];
3015 size_t nlo
= nlines
/ 2;
3016 size_t nhi
= nlines
- nlo
;
3017 struct line
*lo
= lines
;
3018 struct line
*hi
= lines
- nlo
;
3019 struct line
*sorted_lo
= temp
;
3021 sortlines (hi
, nhi
, temp
);
3023 sortlines_temp (lo
, nlo
, sorted_lo
);
3025 sorted_lo
[-1] = lo
[-1];
3027 mergelines (lines
, sorted_lo
, nlo
, hi
, nhi
);
3031 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
3032 rather than sorting in place. */
3035 sortlines_temp (struct line
*lines
, size_t nlines
, struct line
*temp
)
3039 /* Declare `swap' as int, not bool, to work around a bug
3040 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
3041 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
3042 int swap
= (0 < compare (&lines
[-1], &lines
[-2], false));
3043 temp
[-1] = lines
[-1 - swap
];
3044 temp
[-2] = lines
[-2 + swap
];
3048 size_t nlo
= nlines
/ 2;
3049 size_t nhi
= nlines
- nlo
;
3050 struct line
*lo
= lines
;
3051 struct line
*hi
= lines
- nlo
;
3052 struct line
*sorted_hi
= temp
- nlo
;
3054 sortlines_temp (hi
, nhi
, sorted_hi
);
3056 sortlines (lo
, nlo
, temp
);
3058 mergelines (temp
, lo
, nlo
, sorted_hi
, nhi
);
3062 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
3063 the same as OUTFILE. If found, merge the found instances (and perhaps
3064 some other files) into a temporary file so that it can in turn be
3065 merged into OUTFILE without destroying OUTFILE before it is completely
3066 read. Return the new value of NFILES, which differs from the old if
3067 some merging occurred.
3069 This test ensures that an otherwise-erroneous use like
3070 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
3071 It's not clear that POSIX requires this nicety.
3072 Detect common error cases, but don't try to catch obscure cases like
3073 "cat ... FILE ... | sort -m -o FILE"
3074 where traditional "sort" doesn't copy the input and where
3075 people should know that they're getting into trouble anyway.
3076 Catching these obscure cases would slow down performance in
3080 avoid_trashing_input (struct sortfile
*files
, size_t ntemps
,
3081 size_t nfiles
, char const *outfile
)
3084 bool got_outstat
= false;
3085 struct stat outstat
;
3087 for (i
= ntemps
; i
< nfiles
; i
++)
3089 bool is_stdin
= STREQ (files
[i
].name
, "-");
3093 if (outfile
&& STREQ (outfile
, files
[i
].name
) && !is_stdin
)
3100 ? stat (outfile
, &outstat
)
3101 : fstat (STDOUT_FILENO
, &outstat
))
3108 ? fstat (STDIN_FILENO
, &instat
)
3109 : stat (files
[i
].name
, &instat
))
3111 && SAME_INODE (instat
, outstat
));
3118 char *temp
= create_temp (&tftp
, &pid
);
3119 size_t num_merged
= 0;
3122 num_merged
+= mergefiles (&files
[i
], 0, nfiles
- i
, tftp
, temp
);
3123 files
[i
].name
= temp
;
3126 if (i
+ num_merged
< nfiles
)
3127 memmove (&files
[i
+ 1], &files
[i
+ num_merged
],
3128 num_merged
* sizeof *files
);
3130 nfiles
-= num_merged
- 1;;
3140 /* Merge the input FILES. NTEMPS is the number of files at the
3141 start of FILES that are temporary; it is zero at the top level.
3142 NFILES is the total number of files. Put the output in
3143 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
3146 merge (struct sortfile
*files
, size_t ntemps
, size_t nfiles
,
3147 char const *output_file
)
3149 while (nmerge
< nfiles
)
3151 /* Number of input files processed so far. */
3154 /* Number of output files generated so far. */
3157 /* nfiles % NMERGE; this counts input files that are left over
3158 after all full-sized merges have been done. */
3161 /* Number of easily-available slots at the next loop iteration. */
3164 /* Do as many NMERGE-size merges as possible. In the case that
3165 nmerge is bogus, increment by the maximum number of file
3166 descriptors allowed. */
3167 for (out
= in
= 0; nmerge
<= nfiles
- in
; out
++)
3171 char *temp
= create_temp (&tfp
, &pid
);
3172 size_t num_merged
= mergefiles (&files
[in
], MIN (ntemps
, nmerge
),
3174 ntemps
-= MIN (ntemps
, num_merged
);
3175 files
[out
].name
= temp
;
3176 files
[out
].pid
= pid
;
3180 remainder
= nfiles
- in
;
3181 cheap_slots
= nmerge
- out
% nmerge
;
3183 if (cheap_slots
< remainder
)
3185 /* So many files remain that they can't all be put into the last
3186 NMERGE-sized output window. Do one more merge. Merge as few
3187 files as possible, to avoid needless I/O. */
3188 size_t nshortmerge
= remainder
- cheap_slots
+ 1;
3191 char *temp
= create_temp (&tfp
, &pid
);
3192 size_t num_merged
= mergefiles (&files
[in
], MIN (ntemps
, nshortmerge
),
3193 nshortmerge
, tfp
, temp
);
3194 ntemps
-= MIN (ntemps
, num_merged
);
3195 files
[out
].name
= temp
;
3196 files
[out
++].pid
= pid
;
3200 /* Put the remaining input files into the last NMERGE-sized output
3201 window, so they will be merged in the next pass. */
3202 memmove (&files
[out
], &files
[in
], (nfiles
- in
) * sizeof *files
);
3207 nfiles
= avoid_trashing_input (files
, ntemps
, nfiles
, output_file
);
3209 /* We aren't guaranteed that this final mergefiles will work, therefore we
3210 try to merge into the output, and then merge as much as we can into a
3211 temp file if we can't. Repeat. */
3215 /* Merge directly into the output file if possible. */
3217 size_t nopened
= open_input_files (files
, nfiles
, &fps
);
3219 if (nopened
== nfiles
)
3221 FILE *ofp
= stream_open (output_file
, "w");
3224 mergefps (files
, ntemps
, nfiles
, ofp
, output_file
, fps
);
3227 if (errno
!= EMFILE
|| nopened
<= 2)
3228 die (_("open failed"), output_file
);
3230 else if (nopened
<= 2)
3231 die (_("open failed"), files
[nopened
].name
);
3233 /* We ran out of file descriptors. Close one of the input
3234 files, to gain a file descriptor. Then create a temporary
3235 file with our spare file descriptor. Retry if that failed
3236 (e.g., some other process could open a file between the time
3237 we closed and tried to create). */
3244 xfclose (fps
[nopened
], files
[nopened
].name
);
3245 temp
= maybe_create_temp (&tfp
, &pid
, ! (nopened
<= 2));
3249 /* Merge into the newly allocated temporary. */
3250 mergefps (&files
[0], MIN (ntemps
, nopened
), nopened
, tfp
, temp
, fps
);
3251 ntemps
-= MIN (ntemps
, nopened
);
3252 files
[0].name
= temp
;
3255 memmove (&files
[1], &files
[nopened
], (nfiles
- nopened
) * sizeof *files
);
3257 nfiles
-= nopened
- 1;
3261 /* Sort NFILES FILES onto OUTPUT_FILE. */
3264 sort (char * const *files
, size_t nfiles
, char const *output_file
)
3268 bool output_file_created
= false;
3274 char const *temp_output
;
3275 char const *file
= *files
;
3276 FILE *fp
= xfopen (file
, "r");
3278 size_t bytes_per_line
= (2 * sizeof (struct line
)
3279 - sizeof (struct line
) / 2);
3282 initbuf (&buf
, bytes_per_line
,
3283 sort_buffer_size (&fp
, 1, files
, nfiles
, bytes_per_line
));
3288 while (fillbuf (&buf
, fp
, file
))
3291 struct line
*linebase
;
3293 if (buf
.eof
&& nfiles
3294 && (bytes_per_line
+ 1
3295 < (buf
.alloc
- buf
.used
- bytes_per_line
* buf
.nlines
)))
3297 /* End of file, but there is more input and buffer room.
3298 Concatenate the next input file; this is faster in
3300 buf
.left
= buf
.used
;
3304 line
= buffer_linelim (&buf
);
3305 linebase
= line
- buf
.nlines
;
3307 sortlines (line
, buf
.nlines
, linebase
);
3308 if (buf
.eof
&& !nfiles
&& !ntemps
&& !buf
.left
)
3311 tfp
= xfopen (output_file
, "w");
3312 temp_output
= output_file
;
3313 output_file_created
= true;
3318 temp_output
= create_temp (&tfp
, NULL
);
3324 write_bytes (line
, tfp
, temp_output
);
3326 while (linebase
< line
&& compare (line
, line
- 1, false) == 0)
3329 while (linebase
< line
);
3331 xfclose (tfp
, temp_output
);
3333 /* Free up some resources every once in a while. */
3334 if (MAX_PROCS_BEFORE_REAP
< nprocs
)
3337 if (output_file_created
)
3346 if (! output_file_created
)
3349 struct tempnode
*node
= temphead
;
3350 struct sortfile
*tempfiles
= xnmalloc (ntemps
, sizeof *tempfiles
);
3351 for (i
= 0; node
; i
++)
3353 tempfiles
[i
].name
= node
->name
;
3354 tempfiles
[i
].pid
= node
->pid
;
3357 merge (tempfiles
, ntemps
, ntemps
, output_file
);
3362 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
3365 insertkey (struct keyfield
*key_arg
)
3367 struct keyfield
**p
;
3368 struct keyfield
*key
= xmemdup (key_arg
, sizeof *key
);
3370 for (p
= &keylist
; *p
; p
= &(*p
)->next
)
3376 /* Report a bad field specification SPEC, with extra info MSGID. */
3378 static void badfieldspec (char const *, char const *)
3381 badfieldspec (char const *spec
, char const *msgid
)
3383 error (SORT_FAILURE
, 0, _("%s: invalid field specification %s"),
3384 _(msgid
), quote (spec
));
3388 /* Report incompatible options. */
3390 static void incompatible_options (char const *) ATTRIBUTE_NORETURN
;
3392 incompatible_options (char const *opts
)
3394 error (SORT_FAILURE
, 0, _("options `-%s' are incompatible"), opts
);
3398 /* Check compatibility of ordering options. */
3401 check_ordering_compatibility (void)
3403 struct keyfield
*key
;
3405 for (key
= keylist
; key
; key
= key
->next
)
3406 if ((1 < (key
->random
+ key
->numeric
+ key
->general_numeric
+ key
->month
3407 + key
->version
+ !!key
->ignore
+ key
->human_numeric
))
3408 || (key
->random
&& key
->translate
))
3410 /* The following is too big, but guaranteed to be "big enough". */
3411 char opts
[sizeof short_options
];
3412 /* Clear flags we're not interested in. */
3413 key
->skipsblanks
= key
->skipeblanks
= key
->reverse
= false;
3414 key_to_opts (key
, opts
);
3415 incompatible_options (opts
);
3419 /* Parse the leading integer in STRING and store the resulting value
3420 (which must fit into size_t) into *VAL. Return the address of the
3421 suffix after the integer. If the value is too large, silently
3422 substitute SIZE_MAX. If MSGID is NULL, return NULL after
3423 failure; otherwise, report MSGID and exit on failure. */
3426 parse_field_count (char const *string
, size_t *val
, char const *msgid
)
3431 switch (xstrtoumax (string
, &suffix
, 10, &n
, ""))
3434 case LONGINT_INVALID_SUFFIX_CHAR
:
3439 case LONGINT_OVERFLOW
:
3440 case LONGINT_OVERFLOW
| LONGINT_INVALID_SUFFIX_CHAR
:
3444 case LONGINT_INVALID
:
3446 error (SORT_FAILURE
, 0, _("%s: invalid count at start of %s"),
3447 _(msgid
), quote (string
));
3454 /* Handle interrupts and hangups. */
3457 sighandler (int sig
)
3460 signal (sig
, SIG_IGN
);
3464 signal (sig
, SIG_DFL
);
3468 /* Set the ordering options for KEY specified in S.
3469 Return the address of the first character in S that
3470 is not a valid ordering option.
3471 BLANKTYPE is the kind of blanks that 'b' should skip. */
3474 set_ordering (const char *s
, struct keyfield
*key
, enum blanktype blanktype
)
3481 if (blanktype
== bl_start
|| blanktype
== bl_both
)
3482 key
->skipsblanks
= true;
3483 if (blanktype
== bl_end
|| blanktype
== bl_both
)
3484 key
->skipeblanks
= true;
3487 key
->ignore
= nondictionary
;
3490 key
->translate
= fold_toupper
;
3493 key
->general_numeric
= true;
3496 key
->human_numeric
= true;
3499 /* Option order should not matter, so don't let -i override
3500 -d. -d implies -i, but -i does not imply -d. */
3502 key
->ignore
= nonprinting
;
3508 key
->numeric
= true;
3514 key
->reverse
= true;
3517 key
->version
= true;
3527 static struct keyfield
*
3528 key_init (struct keyfield
*key
)
3530 memset (key
, 0, sizeof *key
);
3531 key
->eword
= SIZE_MAX
;
3532 key
->iec_present
= -1;
3537 main (int argc
, char **argv
)
3539 struct keyfield
*key
;
3540 struct keyfield key_buf
;
3541 struct keyfield gkey
;
3542 bool gkey_only
= false;
3546 bool mergeonly
= false;
3547 char *random_source
= NULL
;
3548 bool need_random
= false;
3550 bool posixly_correct
= (getenv ("POSIXLY_CORRECT") != NULL
);
3551 bool obsolete_usage
= (posix2_version () < 200112);
3553 char *files_from
= NULL
;
3555 char const *outfile
= NULL
;
3557 initialize_main (&argc
, &argv
);
3558 set_program_name (argv
[0]);
3559 setlocale (LC_ALL
, "");
3560 bindtextdomain (PACKAGE
, LOCALEDIR
);
3561 textdomain (PACKAGE
);
3563 initialize_exit_failure (SORT_FAILURE
);
3565 hard_LC_COLLATE
= hard_locale (LC_COLLATE
);
3566 #if HAVE_NL_LANGINFO
3567 hard_LC_TIME
= hard_locale (LC_TIME
);
3570 /* Get locale's representation of the decimal point. */
3572 struct lconv
const *locale
= localeconv ();
3574 /* If the locale doesn't define a decimal point, or if the decimal
3575 point is multibyte, use the C locale's decimal point. FIXME:
3576 add support for multibyte decimal points. */
3577 decimal_point
= to_uchar (locale
->decimal_point
[0]);
3578 if (! decimal_point
|| locale
->decimal_point
[1])
3579 decimal_point
= '.';
3581 /* FIXME: add support for multibyte thousands separators. */
3582 thousands_sep
= to_uchar (*locale
->thousands_sep
);
3583 if (! thousands_sep
|| locale
->thousands_sep
[1])
3587 have_read_stdin
= false;
3592 static int const sig
[] =
3594 /* The usual suspects. */
3595 SIGALRM
, SIGHUP
, SIGINT
, SIGPIPE
, SIGQUIT
, SIGTERM
,
3612 enum { nsigs
= ARRAY_CARDINALITY (sig
) };
3615 struct sigaction act
;
3617 sigemptyset (&caught_signals
);
3618 for (i
= 0; i
< nsigs
; i
++)
3620 sigaction (sig
[i
], NULL
, &act
);
3621 if (act
.sa_handler
!= SIG_IGN
)
3622 sigaddset (&caught_signals
, sig
[i
]);
3625 act
.sa_handler
= sighandler
;
3626 act
.sa_mask
= caught_signals
;
3629 for (i
= 0; i
< nsigs
; i
++)
3630 if (sigismember (&caught_signals
, sig
[i
]))
3631 sigaction (sig
[i
], &act
, NULL
);
3633 for (i
= 0; i
< nsigs
; i
++)
3634 if (signal (sig
[i
], SIG_IGN
) != SIG_IGN
)
3636 signal (sig
[i
], sighandler
);
3637 siginterrupt (sig
[i
], 1);
3641 signal (SIGCHLD
, SIG_DFL
); /* Don't inherit CHLD handling from parent. */
3643 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
3644 atexit (exit_cleanup
);
3647 gkey
.sword
= SIZE_MAX
;
3649 files
= xnmalloc (argc
, sizeof *files
);
3653 /* Parse an operand as a file after "--" was seen; or if
3654 pedantic and a file was seen, unless the POSIX version
3655 predates 1003.1-2001 and -c was not seen and the operand is
3656 "-o FILE" or "-oFILE". */
3660 || (posixly_correct
&& nfiles
!= 0
3661 && ! (obsolete_usage
3664 && argv
[optind
][0] == '-' && argv
[optind
][1] == 'o'
3665 && (argv
[optind
][2] || optind
+ 1 != argc
)))
3666 || ((c
= getopt_long (argc
, argv
, short_options
,
3672 files
[nfiles
++] = argv
[optind
++];
3678 if (optarg
[0] == '+')
3680 bool minus_pos_usage
= (optind
!= argc
&& argv
[optind
][0] == '-'
3681 && ISDIGIT (argv
[optind
][1]));
3682 obsolete_usage
|= minus_pos_usage
&& !posixly_correct
;
3685 /* Treat +POS1 [-POS2] as a key if possible; but silently
3686 treat an operand as a file if it is not a valid +POS1. */
3687 key
= key_init (&key_buf
);
3688 s
= parse_field_count (optarg
+ 1, &key
->sword
, NULL
);
3690 s
= parse_field_count (s
+ 1, &key
->schar
, NULL
);
3691 if (! (key
->sword
|| key
->schar
))
3692 key
->sword
= SIZE_MAX
;
3693 if (! s
|| *set_ordering (s
, key
, bl_start
))
3697 if (minus_pos_usage
)
3699 char const *optarg1
= argv
[optind
++];
3700 s
= parse_field_count (optarg1
+ 1, &key
->eword
,
3701 N_("invalid number after `-'"));
3703 s
= parse_field_count (s
+ 1, &key
->echar
,
3704 N_("invalid number after `.'"));
3705 if (!key
->echar
&& key
->eword
)
3707 /* obsolescent syntax +A.x -B.y is equivalent to:
3708 -k A+1.x+1,B.y (when y = 0)
3709 -k A+1.x+1,B+1.y (when y > 0)
3710 So eword is decremented as in the -k case
3711 only when the end field (B) is specified and
3715 if (*set_ordering (s
, key
, bl_end
))
3716 badfieldspec (optarg1
,
3717 N_("stray character in field spec"));
3719 key
->obsolete_used
= true;
3725 files
[nfiles
++] = optarg
;
3729 c
= XARGMATCH ("--sort", optarg
, sort_args
, sort_types
);
3746 set_ordering (str
, &gkey
, bl_both
);
3752 ? XARGMATCH ("--check", optarg
, check_args
, check_types
)
3757 if (checkonly
&& checkonly
!= c
)
3758 incompatible_options ("cC");
3762 case COMPRESS_PROGRAM_OPTION
:
3763 if (compress_program
&& !STREQ (compress_program
, optarg
))
3764 error (SORT_FAILURE
, 0, _("multiple compress programs specified"));
3765 compress_program
= optarg
;
3768 case DEBUG_PROGRAM_OPTION
:
3772 case FILES0_FROM_OPTION
:
3773 files_from
= optarg
;
3777 key
= key_init (&key_buf
);
3780 s
= parse_field_count (optarg
, &key
->sword
,
3781 N_("invalid number at field start"));
3784 /* Provoke with `sort -k0' */
3785 badfieldspec (optarg
, N_("field number is zero"));
3789 s
= parse_field_count (s
+ 1, &key
->schar
,
3790 N_("invalid number after `.'"));
3793 /* Provoke with `sort -k1.0' */
3794 badfieldspec (optarg
, N_("character offset is zero"));
3797 if (! (key
->sword
|| key
->schar
))
3798 key
->sword
= SIZE_MAX
;
3799 s
= set_ordering (s
, key
, bl_start
);
3802 key
->eword
= SIZE_MAX
;
3808 s
= parse_field_count (s
+ 1, &key
->eword
,
3809 N_("invalid number after `,'"));
3812 /* Provoke with `sort -k1,0' */
3813 badfieldspec (optarg
, N_("field number is zero"));
3817 s
= parse_field_count (s
+ 1, &key
->echar
,
3818 N_("invalid number after `.'"));
3820 s
= set_ordering (s
, key
, bl_end
);
3823 badfieldspec (optarg
, N_("stray character in field spec"));
3832 specify_nmerge (oi
, c
, optarg
);
3836 if (outfile
&& !STREQ (outfile
, optarg
))
3837 error (SORT_FAILURE
, 0, _("multiple output files specified"));
3841 case RANDOM_SOURCE_OPTION
:
3842 if (random_source
&& !STREQ (random_source
, optarg
))
3843 error (SORT_FAILURE
, 0, _("multiple random sources specified"));
3844 random_source
= optarg
;
3852 specify_sort_size (oi
, c
, optarg
);
3857 char newtab
= optarg
[0];
3859 error (SORT_FAILURE
, 0, _("empty tab"));
3862 if (STREQ (optarg
, "\\0"))
3866 /* Provoke with `sort -txx'. Complain about
3867 "multi-character tab" instead of "multibyte tab", so
3868 that the diagnostic's wording does not need to be
3869 changed once multibyte characters are supported. */
3870 error (SORT_FAILURE
, 0, _("multi-character tab %s"),
3874 if (tab
!= TAB_DEFAULT
&& tab
!= newtab
)
3875 error (SORT_FAILURE
, 0, _("incompatible tabs"));
3881 add_temp_dir (optarg
);
3889 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3890 through Solaris 7. It is also accepted by many non-Solaris
3891 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3892 -y is marked as obsolete starting with Solaris 8 (1999), but is
3893 still accepted as of Solaris 10 prerelease (2004).
3895 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3896 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3897 and which in general ignores the argument after "-y" if it
3898 consists entirely of digits (it can even be empty). */
3899 if (optarg
== argv
[optind
- 1])
3902 for (p
= optarg
; ISDIGIT (*p
); p
++)
3904 optind
-= (*p
!= '\0');
3912 case_GETOPT_HELP_CHAR
;
3914 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
3917 usage (SORT_FAILURE
);
3925 /* When using --files0-from=F, you may not specify any files
3926 on the command-line. */
3929 error (0, 0, _("extra operand %s"), quote (files
[0]));
3930 fprintf (stderr
, "%s\n",
3931 _("file operands cannot be combined with --files0-from"));
3932 usage (SORT_FAILURE
);
3935 if (STREQ (files_from
, "-"))
3939 stream
= fopen (files_from
, "r");
3941 error (SORT_FAILURE
, errno
, _("cannot open %s for reading"),
3942 quote (files_from
));
3945 readtokens0_init (&tok
);
3947 if (! readtokens0 (stream
, &tok
) || fclose (stream
) != 0)
3948 error (SORT_FAILURE
, 0, _("cannot read file names from %s"),
3949 quote (files_from
));
3957 for (i
= 0; i
< nfiles
; i
++)
3959 if (STREQ (files
[i
], "-"))
3960 error (SORT_FAILURE
, 0, _("when reading file names from stdin, "
3961 "no file name of %s allowed"),
3963 else if (files
[i
][0] == '\0')
3965 /* Using the standard `filename:line-number:' prefix here is
3966 not totally appropriate, since NUL is the separator, not NL,
3967 but it might be better than nothing. */
3968 unsigned long int file_number
= i
+ 1;
3969 error (SORT_FAILURE
, 0,
3970 _("%s:%lu: invalid zero-length file name"),
3971 quotearg_colon (files_from
), file_number
);
3976 error (SORT_FAILURE
, 0, _("no input from %s"),
3977 quote (files_from
));
3980 /* Inheritance of global options to individual keys. */
3981 for (key
= keylist
; key
; key
= key
->next
)
3983 if (default_key_compare (key
) && !key
->reverse
)
3985 key
->ignore
= gkey
.ignore
;
3986 key
->translate
= gkey
.translate
;
3987 key
->skipsblanks
= gkey
.skipsblanks
;
3988 key
->skipeblanks
= gkey
.skipeblanks
;
3989 key
->month
= gkey
.month
;
3990 key
->numeric
= gkey
.numeric
;
3991 key
->general_numeric
= gkey
.general_numeric
;
3992 key
->human_numeric
= gkey
.human_numeric
;
3993 key
->version
= gkey
.version
;
3994 key
->random
= gkey
.random
;
3995 key
->reverse
= gkey
.reverse
;
3998 need_random
|= key
->random
;
4001 if (!keylist
&& !default_key_compare (&gkey
))
4005 need_random
|= gkey
.random
;
4008 check_ordering_compatibility ();
4010 /* Disable this combination so that users are less likely
4011 to inadvertantly update a file with debugging enabled.
4012 Also it simplifies the code for handling temp files. */
4013 if (debug
&& outfile
)
4014 error (SORT_FAILURE
, 0, _("options -o and --debug are incompatible"));
4018 /* Always output the locale in debug mode, since this
4019 is such a common source of confusion. */
4020 if (hard_LC_COLLATE
)
4021 error (0, 0, _("using %s sorting rules"),
4022 quote (setlocale (LC_COLLATE
, NULL
)));
4024 error (0, 0, _("using simple byte comparison"));
4025 key_warnings (&gkey
, gkey_only
);
4028 reverse
= gkey
.reverse
;
4032 randread_source
= randread_new (random_source
, MD5_DIGEST_SIZE
);
4033 if (! randread_source
)
4034 die (_("open failed"), random_source
);
4037 if (temp_dir_count
== 0)
4039 char const *tmp_dir
= getenv ("TMPDIR");
4040 add_temp_dir (tmp_dir
? tmp_dir
: DEFAULT_TMPDIR
);
4045 static char *minus
= (char *) "-";
4051 /* Need to re-check that we meet the minimum requirement for memory
4052 usage with the final value for NMERGE. */
4054 sort_size
= MAX (sort_size
, MIN_SORT_SIZE
);
4059 error (SORT_FAILURE
, 0, _("extra operand %s not allowed with -%c"),
4060 quote (files
[1]), checkonly
);
4064 static char opts
[] = {0, 'o', 0};
4065 opts
[0] = checkonly
;
4066 incompatible_options (opts
);
4069 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
4070 input is not properly sorted. */
4071 exit (check (files
[0], checkonly
) ? EXIT_SUCCESS
: SORT_OUT_OF_ORDER
);
4076 struct sortfile
*sortfiles
= xcalloc (nfiles
, sizeof *sortfiles
);
4079 for (i
= 0; i
< nfiles
; ++i
)
4080 sortfiles
[i
].name
= files
[i
];
4082 merge (sortfiles
, 0, nfiles
, outfile
);
4083 IF_LINT (free (sortfiles
));
4086 sort (files
, nfiles
, outfile
);
4088 if (have_read_stdin
&& fclose (stdin
) == EOF
)
4089 die (_("close failed"), "-");
4091 exit (EXIT_SUCCESS
);