1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 1991-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
26 #include <sys/types.h>
31 #include "long-options.h"
33 #include "hard-locale.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "sort"
40 #define AUTHORS "Mike Haertel"
42 #if defined ENABLE_NLS && HAVE_LANGINFO_H
43 # include <langinfo.h>
46 #if HAVE_PATHCONF && defined _PC_NAME_MAX
47 # define NAME_MAX_IN_DIR(Dir) pathconf (Dir, _PC_NAME_MAX)
49 # define NAME_MAX_IN_DIR(Dir) 255
58 /* Undefine, to avoid warning about redefinition on some systems. */
60 #define min(a, b) ((a) < (b) ? (a) : (b))
62 #define max(a, b) ((a) > (b) ? (a) : (b))
64 #define UCHAR_LIM (UCHAR_MAX + 1)
65 #define UCHAR(c) ((unsigned char) (c))
67 #ifndef DEFAULT_TMPDIR
68 # define DEFAULT_TMPDIR "/tmp"
71 /* Use this as exit status in case of error, not EXIT_FAILURE. This
72 is necessary because EXIT_FAILURE is usually 1 and POSIX requires
73 that sort exit with status 1 IFF invoked with -c and the input is
74 not properly sorted. Any other irregular exit must exit with a
75 status code greater than 1. */
76 #define SORT_FAILURE 2
78 #define C_DECIMAL_POINT '.'
79 #define NEGATION_SIGN '-'
80 #define NUMERIC_ZERO '0'
84 static char decimal_point
;
85 static int th_sep
; /* if CHAR_MAX + 1, then there is no thousands separator */
87 /* Nonzero if the corresponding locales are hard. */
88 static int hard_LC_COLLATE
;
89 static int hard_LC_CTYPE
;
91 static int hard_LC_TIME
;
94 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)
98 # define decimal_point C_DECIMAL_POINT
99 # define IS_THOUSANDS_SEP(x) 0
103 /* The kind of blanks for '-b' to skip in various options. */
104 enum blanktype
{ bl_start
, bl_end
, bl_both
};
106 /* The character marking end of line. Default to \n. */
109 /* Lines are held in core as counted strings. */
112 char *text
; /* Text of the line. */
113 int length
; /* Length including final newline. */
114 char *keybeg
; /* Start of first key. */
115 char *keylim
; /* Limit of first key. */
118 /* Arrays of lines. */
121 struct line
*lines
; /* Dynamically allocated array of lines. */
122 int used
; /* Number of slots used. */
123 int alloc
; /* Number of slots allocated. */
124 int limit
; /* Max number of slots to allocate. */
130 char *buf
; /* Dynamically allocated buffer. */
131 int used
; /* Number of bytes used. */
132 int alloc
; /* Number of bytes allocated. */
133 int left
; /* Number of bytes left after line parsing. */
138 int sword
; /* Zero-origin 'word' to start at. */
139 int schar
; /* Additional characters to skip. */
140 int skipsblanks
; /* Skip leading white space at start. */
141 int eword
; /* Zero-origin first word after field. */
142 int echar
; /* Additional characters in field. */
143 int skipeblanks
; /* Skip trailing white space at finish. */
144 int *ignore
; /* Boolean array of characters to ignore. */
145 char *translate
; /* Translation applied to characters. */
146 int numeric
; /* Flag for numeric comparison. Handle
147 strings of digits with optional decimal
148 point, but no exponential notation. */
149 int general_numeric
; /* Flag for general, numeric comparison.
150 Handle numbers in exponential notation. */
151 int month
; /* Flag for comparison by month name. */
152 int reverse
; /* Reverse the sense of comparison. */
153 struct keyfield
*next
; /* Next keyfield to try. */
162 /* The name this program was run with. */
165 /* Table of white space. */
166 static int blanks
[UCHAR_LIM
];
168 /* Table of non-printing characters. */
169 static int nonprinting
[UCHAR_LIM
];
171 /* Table of non-dictionary characters (not letters, digits, or blanks). */
172 static int nondictionary
[UCHAR_LIM
];
174 /* Translation table folding lower case to upper.
175 FIXME: This doesn't work with multibyte character sets. */
176 static char fold_toupper
[UCHAR_LIM
];
178 #define MONTHS_PER_YEAR 12
180 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
181 # define MONTHTAB_CONST /* empty */
183 # define MONTHTAB_CONST const
186 /* Table mapping month names to integers.
187 Alphabetic order allows binary search. */
188 static MONTHTAB_CONST
struct month monthtab
[] =
204 /* During the merge phase, the number of files to merge at once. */
207 /* Initial buffer size for in core sorting. Will not grow unless a
208 line longer than this is seen. */
209 static int sortalloc
= 512 * 1024 + 1;
211 /* Initial buffer size for in core merge buffers. Bear in mind that
212 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
213 static int mergealloc
= 16 * 1024 + 1;
215 /* Guess of average line length. */
216 static int linelength
= 30;
218 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
219 #define LINEALLOC (256 * 1024)
221 /* Directory in which any temporary files are to be created. */
222 static char *temp_dir
;
224 /* Flag to reverse the order of all comparisons. */
227 /* Flag for stable sort. This turns off the last ditch bytewise
228 comparison of lines, and instead leaves lines in the same order
229 they were read if all keys compare equal. */
232 /* Tab character separating fields. If NUL, then fields are separated
233 by the empty string between a non-whitespace character and a whitespace
237 /* Flag to remove consecutive duplicate lines from the output.
238 Only the last of a sequence of equal lines will be output. */
241 /* Nonzero if any of the input files are the standard input. */
242 static int have_read_stdin
;
244 /* Lists of key field comparisons to be tried. */
245 static struct keyfield keyhead
;
251 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
256 Usage: %s [OPTION]... [FILE]...\n\
260 Write sorted concatenation of all FILE(s) to standard output.\n\
262 +POS1 [-POS2] start a key at POS1, end it *before* POS2 (obsolescent)\n\
263 field numbers and character offsets are numbered\n\
264 starting with zero (contrast with the -k option)\n\
265 -b ignore leading blanks in sort fields or keys\n\
266 -c check if given files already sorted, do not sort\n\
267 -d consider only [a-zA-Z0-9 ] characters in keys\n\
268 -f fold lower case to upper case characters in keys\n\
269 -g compare according to general numerical value, imply -b\n\
270 -i consider only [\\040-\\0176] characters in keys\n\
271 -k POS1[,POS2] start a key at POS1, end it *at* POS2\n\
272 field numbers and character offsets are numbered\n\
273 starting with one (contrast with zero-based +POS form)\n\
274 -m merge already sorted files, do not sort\n\
275 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
276 -n compare according to string numerical value, imply -b\n\
277 -o FILE write result on FILE instead of standard output\n\
278 -r reverse the result of comparisons\n\
279 -s stabilize sort by disabling last resort comparison\n\
280 -t SEP use SEParator instead of non- to whitespace transition\n\
281 -T DIRECTORY use DIRECTORY for temporary files, not $TMPDIR or %s\n\
282 -u with -c, check for strict ordering;\n\
283 with -m, only output the first of an equal sequence\n\
284 -z end lines with 0 byte, not newline, for find -print0\n\
285 --help display this help and exit\n\
286 --version output version information and exit\n\
291 POS is F[.C][OPTS], where F is the field number and C the character position\n\
292 in the field, both counted from one with -k, from zero with the obsolescent\n\
293 form. OPTS is made up of one or more of Mbdfinr; this effectively disables\n\
294 global -Mbdfinr settings for that key. If no key is given, use the entire\n\
295 line as the key. With no FILE, or when FILE is -, read standard input.\n\
298 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
300 /* Don't use EXIT_FAILURE here in case it is defined to be 1.
301 POSIX requires that sort return 1 IFF invoked with -c and
302 the input is not properly sorted. */
303 assert (status
== 0 || status
== SORT_FAILURE
);
307 /* The list of temporary files. */
308 static struct tempnode
311 struct tempnode
*next
;
314 /* Clean up any remaining temporary files. */
319 struct tempnode
*node
;
321 for (node
= temphead
.next
; node
; node
= node
->next
)
326 xtmpfopen (const char *file
)
331 /* Open temporary file exclusively, to foil a common
332 denial-of-service attack. */
333 fd
= open (file
, O_WRONLY
| O_CREAT
| O_TRUNC
| O_EXCL
, 0600);
334 if (fd
< 0 || (fp
= fdopen (fd
, "w")) == NULL
)
336 error (0, errno
, "%s", file
);
345 xfopen (const char *file
, const char *how
)
349 if (STREQ (file
, "-"))
355 if ((fp
= fopen (file
, how
)) == NULL
)
357 error (0, errno
, "%s", file
);
373 /* Allow reading stdin from tty more than once. */
377 else if (fp
== stdout
)
379 if (fflush (fp
) != 0)
381 error (0, errno
, _("flushing file"));
388 if (fclose (fp
) != 0)
390 error (0, errno
, _("error closing file"));
398 write_bytes (const char *buf
, size_t n_bytes
, FILE *fp
)
400 if (fwrite (buf
, 1, n_bytes
, fp
) != n_bytes
)
402 error (0, errno
, _("write error"));
408 /* Return a name for a temporary file. */
413 static unsigned int seq
;
414 int len
= strlen (temp_dir
);
415 char *name
= xmalloc (len
+ 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
416 int long_file_names
= NAME_MAX_IN_DIR (temp_dir
) > 12;
417 struct tempnode
*node
;
419 /* If long filenames aren't supported, we cannot use filenames
420 longer than 8+3 and still assume they are unique. */
423 "%s%ssort%5.5d%5.5d",
425 (len
&& temp_dir
[len
- 1] != '/') ? "/" : "",
426 (unsigned int) getpid () & 0xffff, seq
);
428 sprintf (name
, "%s%ss%5.5d%2.2d.%3.3d",
430 (len
&& temp_dir
[len
- 1] != '/') ? "/" : "",
431 (unsigned int) getpid () & 0xffff, seq
/ 1000, seq
% 1000);
435 /* Make sure that SEQ's value fits in 5 digits if temp_dir is on
436 an 8.3 filesystem. */
437 if (!long_file_names
&& seq
>= 100000)
440 node
= (struct tempnode
*) xmalloc (sizeof (struct tempnode
));
442 node
->next
= temphead
.next
;
443 temphead
.next
= node
;
447 /* Search through the list of temporary files for NAME;
448 remove it if it is found on the list. */
451 zaptemp (const char *name
)
453 struct tempnode
*node
, *temp
;
455 for (node
= &temphead
; node
->next
; node
= node
->next
)
456 if (STREQ (name
, node
->next
->name
))
463 node
->next
= temp
->next
;
464 free ((char *) temp
);
471 struct_month_cmp (const void *m1
, const void *m2
)
473 return strcmp (((const struct month
*) m1
)->name
,
474 ((const struct month
*) m2
)->name
);
479 /* Initialize the character class tables. */
486 for (i
= 0; i
< UCHAR_LIM
; ++i
)
492 if (!ISALNUM (i
) && !ISBLANK (i
))
493 nondictionary
[i
] = 1;
495 fold_toupper
[i
] = toupper (i
);
500 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
501 /* If we're not in the "C" locale, read different names for months. */
504 for (i
= 0; i
< MONTHS_PER_YEAR
; i
++)
511 s
= (char *) nl_langinfo (ABMON_1
+ i
);
513 monthtab
[i
].name
= name
= (char *) xmalloc (s_len
+ 1);
514 monthtab
[i
].val
= i
+ 1;
516 for (j
= 0; j
< s_len
; j
++)
517 name
[j
] = fold_toupper
[UCHAR (s
[j
])];
520 qsort ((void *) monthtab
, MONTHS_PER_YEAR
,
521 sizeof (struct month
), struct_month_cmp
);
526 /* Initialize BUF, allocating ALLOC bytes initially. */
529 initbuf (struct buffer
*buf
, int alloc
)
532 buf
->buf
= xmalloc (buf
->alloc
);
533 buf
->used
= buf
->left
= 0;
536 /* Fill BUF reading from FP, moving buf->left bytes from the end
537 of buf->buf to the beginning first. If EOF is reached and the
538 file wasn't terminated by a newline, supply one. Always leave
539 at least one unused byte at the end. Return a count of bytes
543 fillbuf (struct buffer
*buf
, FILE *fp
)
547 memmove (buf
->buf
, buf
->buf
+ buf
->used
- buf
->left
, buf
->left
);
548 buf
->used
= buf
->left
;
550 while (!feof (fp
) && (buf
->used
== 0
551 || !memchr (buf
->buf
, eolchar
, buf
->used
)))
553 if (buf
->used
== buf
->alloc
- 1)
555 buf
->alloc
= buf
->alloc
* 2 - 1;
556 buf
->buf
= xrealloc (buf
->buf
, buf
->alloc
);
558 cc
= fread (buf
->buf
+ buf
->used
, 1, buf
->alloc
- 1 - buf
->used
, fp
);
561 error (0, errno
, _("read error"));
568 if (feof (fp
) && buf
->used
&& buf
->buf
[buf
->used
- 1] != eolchar
)
570 if (buf
->used
== buf
->alloc
- 1)
572 buf
->alloc
= buf
->alloc
* 2 - 1;
573 buf
->buf
= xrealloc (buf
->buf
, buf
->alloc
);
575 buf
->buf
[buf
->used
++] = eolchar
;
581 /* Initialize LINES, allocating space for ALLOC lines initially.
582 LIMIT is the maximum possible number of lines to allocate space
586 initlines (struct lines
*lines
, int alloc
, int limit
)
588 lines
->alloc
= alloc
;
589 lines
->lines
= (struct line
*) xmalloc (lines
->alloc
* sizeof (struct line
));
591 lines
->limit
= limit
;
594 /* Return a pointer to the first character of the field specified
598 begfield (const struct line
*line
, const struct keyfield
*key
)
600 register char *ptr
= line
->text
, *lim
= ptr
+ line
->length
;
601 register int sword
= key
->sword
, schar
= key
->schar
;
604 while (ptr
< lim
&& sword
--)
606 while (ptr
< lim
&& *ptr
!= tab
)
612 while (ptr
< lim
&& sword
--)
614 while (ptr
< lim
&& blanks
[UCHAR (*ptr
)])
616 while (ptr
< lim
&& !blanks
[UCHAR (*ptr
)])
620 if (key
->skipsblanks
)
621 while (ptr
< lim
&& blanks
[UCHAR (*ptr
)])
624 if (ptr
+ schar
<= lim
)
632 /* Return the limit of (a pointer to the first character after) the field
633 in LINE specified by KEY. */
636 limfield (const struct line
*line
, const struct keyfield
*key
)
638 register char *ptr
= line
->text
, *lim
= ptr
+ line
->length
;
639 register int eword
= key
->eword
, echar
= key
->echar
;
641 /* Note: from the POSIX spec:
642 The leading field separator itself is included in
643 a field when -t is not used. FIXME: move this comment up... */
645 /* Move PTR past EWORD fields or to one past the last byte on LINE,
646 whichever comes first. If there are more than EWORD fields, leave
647 PTR pointing at the beginning of the field having zero-based index,
648 EWORD. If a delimiter character was specified (via -t), then that
649 `beginning' is the first character following the delimiting TAB.
650 Otherwise, leave PTR pointing at the first `blank' character after
651 the preceding field. */
653 while (ptr
< lim
&& eword
--)
655 while (ptr
< lim
&& *ptr
!= tab
)
657 if (ptr
< lim
&& (eword
|| echar
> 0))
661 while (ptr
< lim
&& eword
--)
663 while (ptr
< lim
&& blanks
[UCHAR (*ptr
)])
665 while (ptr
< lim
&& !blanks
[UCHAR (*ptr
)])
669 #ifdef POSIX_UNSPECIFIED
670 /* The following block of code makes GNU sort incompatible with
671 standard Unix sort, so it's ifdef'd out for now.
672 The POSIX spec isn't clear on how to interpret this.
673 FIXME: request clarification.
675 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
676 Date: Thu, 30 May 96 12:20:41 -0400
678 [...]I believe I've found another bug in `sort'.
683 $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
686 $ /bin/sort +0.6 -0.7 </tmp/sort.in
690 Unix sort produced the answer I expected: sort on the single character
691 in column 6. GNU sort produced different results, because it disagrees
692 on the interpretation of the key-end spec "-M.N". Unix sort reads this
693 as "skip M fields, then N characters"; but GNU sort wants it to mean
694 "skip M fields, then either N characters or the rest of the current
695 field, whichever comes first". This extra clause applies only to
696 key-ends, not key-starts.
699 /* Make LIM point to the end of (one byte past) the current field. */
703 newlim
= memchr (ptr
, tab
, lim
- ptr
);
711 while (newlim
< lim
&& blanks
[UCHAR (*newlim
)])
713 while (newlim
< lim
&& !blanks
[UCHAR (*newlim
)])
719 /* If we're skipping leading blanks, don't start counting characters
720 until after skipping past any leading blanks. */
721 if (key
->skipsblanks
)
722 while (ptr
< lim
&& blanks
[UCHAR (*ptr
)])
725 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
726 if (ptr
+ echar
<= lim
)
737 trim_trailing_blanks (const char *a_start
, char **a_end
)
739 while (*a_end
> a_start
&& blanks
[UCHAR (*(*a_end
- 1))])
743 /* Find the lines in BUF, storing pointers and lengths in LINES. */
746 findlines (struct buffer
*buf
, struct lines
*lines
)
748 register char *beg
= buf
->buf
, *lim
= buf
->buf
+ buf
->used
, *ptr
;
749 struct keyfield
*key
= keyhead
.next
;
753 while (beg
< lim
&& (ptr
= memchr (beg
, eolchar
, lim
- beg
))
754 && lines
->used
< lines
->limit
)
756 if (lines
->used
== lines
->alloc
)
759 lines
->lines
= (struct line
*)
760 xrealloc ((char *) lines
->lines
,
761 lines
->alloc
* sizeof (struct line
));
765 lines
->lines
[lines
->used
].text
= beg
;
766 lines
->lines
[lines
->used
].length
= ptr
- beg
;
768 /* Precompute the position of the first key for efficiency. */
772 lines
->lines
[lines
->used
].keylim
=
773 limfield (&lines
->lines
[lines
->used
], key
);
775 lines
->lines
[lines
->used
].keylim
= ptr
;
778 lines
->lines
[lines
->used
].keybeg
=
779 begfield (&lines
->lines
[lines
->used
], key
);
782 if (key
->skipsblanks
)
783 while (blanks
[UCHAR (*beg
)])
785 lines
->lines
[lines
->used
].keybeg
= beg
;
787 if (key
->skipeblanks
)
789 trim_trailing_blanks (lines
->lines
[lines
->used
].keybeg
,
790 &lines
->lines
[lines
->used
].keylim
);
795 lines
->lines
[lines
->used
].keybeg
= 0;
796 lines
->lines
[lines
->used
].keylim
= 0;
803 buf
->left
= lim
- beg
;
806 /* Compare strings A and B containing decimal fractions < 1. Each string
807 should begin with a decimal point followed immediately by the digits
808 of the fraction. Strings not of this form are considered to be zero. */
810 /* The goal here, is to take two numbers a and b... compare these
811 in parallel. Instead of converting each, and then comparing the
812 outcome. Most likely stopping the comparison before the conversion
813 is complete. The algorithm used, in the old sort:
815 Algorithm: fraccompare
816 Action : compare two decimal fractions
817 accepts : char *a, char *b
818 returns : -1 if a<b, 0 if a=b, 1 if a>b.
821 if *a == decimal_point AND *b == decimal_point
822 find first character different in a and b.
823 if both are digits, return the difference *a - *b.
826 if digit return 1, else 0
829 if digit return -1, else 0
830 if *a is a decimal_point
831 skip past decimal_point and zeros
832 if digit return 1, else 0
833 if *b is a decimal_point
834 skip past decimal_point and zeros
835 if digit return -1, else 0
839 fraccompare (register const char *a
, register const char *b
)
841 if (*a
== decimal_point
&& *b
== decimal_point
)
846 if (ISDIGIT (*a
) && ISDIGIT (*b
))
849 goto a_trailing_nonzero
;
851 goto b_trailing_nonzero
;
854 else if (*a
++ == decimal_point
)
857 while (*a
== NUMERIC_ZERO
)
861 else if (*b
++ == decimal_point
)
864 while (*b
== NUMERIC_ZERO
)
866 return - ISDIGIT (*b
);
871 /* Compare strings A and B as numbers without explicitly converting them to
872 machine numbers. Comparatively slow for short strings, but asymptotically
876 numcompare (register const char *a
, register const char *b
)
878 register int tmpa
, tmpb
, loga
, logb
, tmp
;
883 while (blanks
[UCHAR (tmpa
)])
885 while (blanks
[UCHAR (tmpb
)])
888 if (tmpa
== NEGATION_SIGN
)
892 while (tmpa
== NUMERIC_ZERO
|| IS_THOUSANDS_SEP (tmpa
));
893 if (tmpb
!= NEGATION_SIGN
)
895 if (tmpa
== decimal_point
)
898 while (tmpa
== NUMERIC_ZERO
);
901 while (tmpb
== NUMERIC_ZERO
|| IS_THOUSANDS_SEP (tmpb
))
903 if (tmpb
== decimal_point
)
906 while (tmpb
== NUMERIC_ZERO
);
913 while (tmpb
== NUMERIC_ZERO
|| IS_THOUSANDS_SEP (tmpb
));
915 while (tmpa
== tmpb
&& ISDIGIT (tmpa
))
919 while (IS_THOUSANDS_SEP (tmpa
));
922 while (IS_THOUSANDS_SEP (tmpb
));
925 if ((tmpa
== decimal_point
&& !ISDIGIT (tmpb
))
926 || (tmpb
== decimal_point
&& !ISDIGIT (tmpa
)))
927 return -fraccompare (a
, b
);
931 for (loga
= 0; ISDIGIT (tmpa
); ++loga
)
934 while (IS_THOUSANDS_SEP (tmpa
));
936 for (logb
= 0; ISDIGIT (tmpb
); ++logb
)
939 while (IS_THOUSANDS_SEP (tmpb
));
941 if (logb
- loga
!= 0)
949 else if (tmpb
== NEGATION_SIGN
)
953 while (tmpb
== NUMERIC_ZERO
|| IS_THOUSANDS_SEP (tmpb
));
954 if (tmpb
== decimal_point
)
957 while (tmpb
== NUMERIC_ZERO
);
960 while (tmpa
== NUMERIC_ZERO
|| IS_THOUSANDS_SEP (tmpa
))
962 if (tmpa
== decimal_point
)
965 while (tmpa
== NUMERIC_ZERO
);
972 while (tmpa
== NUMERIC_ZERO
|| IS_THOUSANDS_SEP (tmpa
))
974 while (tmpb
== NUMERIC_ZERO
|| IS_THOUSANDS_SEP (tmpb
))
977 while (tmpa
== tmpb
&& ISDIGIT (tmpa
))
981 while (IS_THOUSANDS_SEP (tmpa
));
984 while (IS_THOUSANDS_SEP (tmpb
));
987 if ((tmpa
== decimal_point
&& !ISDIGIT (tmpb
))
988 || (tmpb
== decimal_point
&& !ISDIGIT (tmpa
)))
989 return fraccompare (a
, b
);
993 for (loga
= 0; ISDIGIT (tmpa
); ++loga
)
996 while (IS_THOUSANDS_SEP (tmpa
));
998 for (logb
= 0; ISDIGIT (tmpb
); ++logb
)
1001 while (IS_THOUSANDS_SEP (tmpb
));
1003 if (loga
- logb
!= 0)
1014 general_numcompare (const char *sa
, const char *sb
)
1016 /* FIXME: add option to warn about failed conversions. */
1017 /* FIXME: maybe add option to try expensive FP conversion
1018 only if A and B can't be compared more cheaply/accurately. */
1022 double a
= strtod (sa
, &ea
);
1023 double b
= strtod (sb
, &eb
);
1025 /* Put conversion errors at the start of the collating sequence. */
1027 return sb
== eb
? 0 : -1;
1031 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1032 conversion errors but before numbers; sort them by internal
1033 bit-pattern, for lack of a more portable alternative. */
1039 : memcmp ((char *) &a
, (char *) &b
, sizeof a
));
1042 /* Return an integer in 1..12 of the month name S with length LEN.
1043 Return 0 if the name in S is not recognized. */
1046 getmonth (const char *s
, int len
)
1049 register int i
, lo
= 0, hi
= MONTHS_PER_YEAR
, result
;
1051 while (len
> 0 && blanks
[UCHAR (*s
)])
1060 month
= (char *) alloca (len
+ 1);
1061 for (i
= 0; i
< len
; ++i
)
1062 month
[i
] = fold_toupper
[UCHAR (s
[i
])];
1063 while (blanks
[UCHAR (month
[i
- 1])])
1069 int ix
= (lo
+ hi
) / 2;
1071 if (strncmp (month
, monthtab
[ix
].name
, strlen (monthtab
[ix
].name
)) < 0)
1076 while (hi
- lo
> 1);
1078 result
= (!strncmp (month
, monthtab
[lo
].name
, strlen (monthtab
[lo
].name
))
1079 ? monthtab
[lo
].val
: 0);
1084 /* Compare two lines A and B trying every key in sequence until there
1085 are no more keys or a difference is found. */
1088 keycompare (const struct line
*a
, const struct line
*b
)
1090 register char *texta
, *textb
, *lima
, *limb
;
1091 register unsigned char *translate
;
1092 register int *ignore
;
1093 struct keyfield
*key
;
1094 int diff
= 0, iter
= 0, lena
, lenb
;
1096 for (key
= keyhead
.next
; key
; key
= key
->next
, ++iter
)
1098 int comparable_lengths
= 1;
1100 ignore
= key
->ignore
;
1101 translate
= (unsigned char *) key
->translate
;
1103 /* Find the beginning and limit of each field. */
1104 if (iter
|| a
->keybeg
== NULL
|| b
->keybeg
== NULL
)
1106 if (key
->eword
>= 0)
1107 lima
= limfield (a
, key
), limb
= limfield (b
, key
);
1109 lima
= a
->text
+ a
->length
, limb
= b
->text
+ b
->length
;
1111 if (key
->sword
>= 0)
1112 texta
= begfield (a
, key
), textb
= begfield (b
, key
);
1115 texta
= a
->text
, textb
= b
->text
;
1116 if (key
->skipsblanks
)
1118 while (texta
< lima
&& blanks
[UCHAR (*texta
)])
1120 while (textb
< limb
&& blanks
[UCHAR (*textb
)])
1127 /* For the first iteration only, the key positions have
1128 been precomputed for us. */
1129 texta
= a
->keybeg
, lima
= a
->keylim
;
1130 textb
= b
->keybeg
, limb
= b
->keylim
;
1133 /* Find the lengths. */
1134 lena
= lima
- texta
, lenb
= limb
- textb
;
1140 if (key
->skipeblanks
)
1142 char *a_end
= texta
+ lena
;
1143 char *b_end
= textb
+ lenb
;
1144 trim_trailing_blanks (texta
, &a_end
);
1145 trim_trailing_blanks (textb
, &b_end
);
1146 lena
= a_end
- texta
;
1147 lenb
= b_end
- textb
;
1150 /* Actually compare the fields. */
1151 if (key
->numeric
| key
->general_numeric
)
1155 /* If the fields are adjacent, adjust the end of the earlier
1156 field back by 1 byte, since we temporarily modify the
1157 byte after the field during comparison. This can't
1158 change a numeric comparison, since the byte is a newline.
1159 If the earlier field is empty, adjust its start as well. */
1161 texta
-= texta
== lima
--;
1163 textb
-= textb
== limb
--;
1165 savea
= *lima
, saveb
= *limb
;
1166 *lima
= *limb
= '\0';
1167 diff
= ((key
->numeric
? numcompare
: general_numcompare
)
1169 *lima
= savea
, *limb
= saveb
;
1171 return key
->reverse
? -diff
: diff
;
1174 else if (key
->month
)
1176 diff
= getmonth (texta
, lena
) - getmonth (textb
, lenb
);
1178 return key
->reverse
? -diff
: diff
;
1183 /* Sorting like this may become slow, so in a simple locale the user
1184 can select a faster sort that is similar to ascii sort */
1185 else if (hard_LC_COLLATE
| hard_LC_CTYPE
)
1187 if (ignore
|| translate
)
1189 char *copy_a
= (char *) alloca (lena
+ 1);
1190 char *copy_b
= (char *) alloca (lenb
+ 1);
1191 int new_len_a
, new_len_b
, i
;
1193 /* Ignore and/or translate chars before comparing. */
1194 for (new_len_a
= new_len_b
= i
= 0; i
< max (lena
, lenb
); i
++)
1198 copy_a
[new_len_a
] = (translate
1199 ? translate
[UCHAR (texta
[i
])]
1201 if (!ignore
|| !ignore
[UCHAR (texta
[i
])])
1206 copy_b
[new_len_b
] = (translate
1207 ? translate
[UCHAR (textb
[i
])]
1209 if (!ignore
|| !ignore
[UCHAR (textb
[i
])])
1214 diff
= memcoll (copy_a
, new_len_a
, copy_b
, new_len_b
);
1218 diff
= memcoll (texta
, lena
, textb
, lenb
);
1222 return key
->reverse
? -diff
: diff
;
1227 else if (ignore
&& translate
)
1229 #define CMP_WITH_IGNORE(A, B) \
1232 while (texta < lima && textb < limb) \
1234 while (texta < lima && ignore[UCHAR (*texta)]) \
1236 while (textb < limb && ignore[UCHAR (*textb)]) \
1238 if (texta < lima && textb < limb) \
1242 diff = UCHAR (A) - UCHAR (B); \
1249 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1251 else if (texta < lima && textb == limb \
1252 && !ignore[UCHAR (*texta)]) \
1258 while (texta < lima && ignore[UCHAR (*texta)]) \
1260 while (textb < limb && ignore[UCHAR (*textb)]) \
1263 if (texta == lima && textb < limb) \
1265 else if (texta < lima && textb == limb) \
1268 /* Relative lengths are meaningless if characters were ignored. \
1269 Handling this case here avoids what might be an invalid length \
1270 comparison below. */ \
1271 if (diff == 0 && texta == lima && textb == limb) \
1272 comparable_lengths = 0; \
1276 CMP_WITH_IGNORE (translate
[UCHAR (*texta
)], translate
[UCHAR (*textb
)]);
1278 CMP_WITH_IGNORE (UCHAR (*texta
), UCHAR (*textb
));
1280 while (texta
< lima
&& textb
< limb
)
1282 if (translate
[UCHAR (*texta
++)] != translate
[UCHAR (*textb
++)])
1284 diff
= (UCHAR (translate
[UCHAR (*--texta
)])
1285 - UCHAR (translate
[UCHAR (*--textb
)]));
1292 if (hard_LC_COLLATE
)
1294 /* Ignore any length difference if the localized comparison
1295 says the strings are equal. */
1296 comparable_lengths
= 0;
1297 diff
= memcoll (texta
, lena
, textb
, lenb
);
1302 diff
= memcmp (texta
, textb
, min (lena
, lenb
));
1307 return key
->reverse
? -diff
: diff
;
1308 if (comparable_lengths
&& (diff
= lena
- lenb
) != 0)
1309 return key
->reverse
? -diff
: diff
;
1315 /* Compare two lines A and B, returning negative, zero, or positive
1316 depending on whether A compares less than, equal to, or greater than B. */
1319 compare (register const struct line
*a
, register const struct line
*b
)
1321 int diff
, tmpa
, tmpb
;
1323 /* First try to compare on the specified keys (if any).
1324 The only two cases with no key at all are unadorned sort,
1325 and unadorned sort -r. */
1328 diff
= keycompare (a
, b
);
1329 if (diff
!= 0 || unique
|| stable
)
1336 /* If the keys all compare equal (or no keys were specified)
1337 fall through to the default byte-by-byte comparison. */
1338 tmpa
= a
->length
, tmpb
= b
->length
;
1341 if (hard_LC_COLLATE
)
1343 diff
= memcoll (a
->text
, tmpa
, b
->text
, tmpb
);
1345 return reverse
? -diff
: diff
;
1349 diff
= UCHAR (a
->text
[0]) - UCHAR (b
->text
[0]);
1352 diff
= memcmp (a
->text
, b
->text
, min (tmpa
, tmpb
));
1357 return reverse
? -diff
: diff
;
1360 /* Check that the lines read from the given FP come in order. Print a
1361 diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1362 the line number of the first out-of-order line (counting from 1) if they
1363 are not in order. Otherwise, print no diagnostic and return zero. */
1366 checkfp (FILE *fp
, const char *file_name
)
1368 struct buffer buf
; /* Input buffer. */
1369 struct lines lines
; /* Lines scanned from the buffer. */
1370 struct line temp
; /* Copy of previous line. */
1371 int cc
; /* Character count. */
1373 int line_number
= 1;
1374 struct line
*disorder_line
;
1375 int disorder_line_number
= 0;
1377 #ifdef lint /* Suppress `used before initialized' warning. */
1378 disorder_line
= NULL
;
1381 initbuf (&buf
, mergealloc
);
1382 initlines (&lines
, mergealloc
/ linelength
+ 1,
1383 LINEALLOC
/ ((NMERGE
+ NMERGE
) * sizeof (struct line
)));
1385 temp
.text
= xmalloc (alloc
);
1387 cc
= fillbuf (&buf
, fp
);
1391 findlines (&buf
, &lines
);
1395 struct line
*prev_line
; /* Pointer to previous line. */
1396 int cmp
; /* Result of calling compare. */
1399 /* Compare each line in the buffer with its successor. */
1400 for (i
= 0; i
< lines
.used
- 1; ++i
)
1402 cmp
= compare (&lines
.lines
[i
], &lines
.lines
[i
+ 1]);
1403 if ((unique
&& cmp
>= 0) || (cmp
> 0))
1405 disorder_line
= &lines
.lines
[i
+ 1];
1406 disorder_line_number
= line_number
+ i
+ 1;
1411 line_number
+= lines
.used
;
1413 /* Save the last line of the buffer and refill the buffer. */
1414 prev_line
= lines
.lines
+ (lines
.used
- 1);
1415 if (prev_line
->length
+ 1 > alloc
)
1421 while (alloc
< prev_line
->length
+ 1);
1422 temp
.text
= xrealloc (temp
.text
, alloc
);
1424 assert (prev_line
->length
+ 1 <= alloc
);
1425 memcpy (temp
.text
, prev_line
->text
, prev_line
->length
);
1426 temp
.length
= prev_line
->length
;
1427 temp
.keybeg
= temp
.text
+ (prev_line
->keybeg
- prev_line
->text
);
1428 temp
.keylim
= temp
.text
+ (prev_line
->keylim
- prev_line
->text
);
1430 cc
= fillbuf (&buf
, fp
);
1434 findlines (&buf
, &lines
);
1435 /* Make sure the line saved from the old buffer contents is
1436 less than or equal to the first line of the new buffer. */
1437 cmp
= compare (&temp
, &lines
.lines
[0]);
1438 if ((unique
&& cmp
>= 0) || (cmp
> 0))
1440 disorder_line
= &lines
.lines
[0];
1441 disorder_line_number
= line_number
;
1449 if (disorder_line_number
)
1451 fprintf (stderr
, _("%s: %s:%d: disorder: "), program_name
, file_name
,
1452 disorder_line_number
);
1453 write_bytes (disorder_line
->text
, disorder_line
->length
, stderr
);
1457 free ((char *) lines
.lines
);
1459 return disorder_line_number
;
1462 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1463 Close FPS before returning. */
1466 mergefps (FILE **fps
, register int nfps
, FILE *ofp
)
1468 struct buffer buffer
[NMERGE
]; /* Input buffers for each file. */
1469 struct lines lines
[NMERGE
]; /* Line tables for each buffer. */
1470 struct line saved
; /* Saved line for unique check. */
1471 int savedflag
= 0; /* True if there is a saved line. */
1472 int savealloc
; /* Size allocated for the saved line. */
1473 int cur
[NMERGE
]; /* Current line in each line table. */
1474 int ord
[NMERGE
]; /* Table representing a permutation of fps,
1475 such that lines[ord[0]].lines[cur[ord[0]]]
1476 is the smallest line and will be next
1478 register int i
, j
, t
;
1480 #ifdef lint /* Suppress `used before initialized' warning. */
1484 /* Allocate space for a saved line if necessary. */
1487 savealloc
= linelength
;
1488 saved
.text
= xmalloc (savealloc
);
1491 /* Read initial lines from each input file. */
1492 for (i
= 0; i
< nfps
; ++i
)
1494 initbuf (&buffer
[i
], mergealloc
);
1495 /* If a file is empty, eliminate it from future consideration. */
1496 while (i
< nfps
&& !fillbuf (&buffer
[i
], fps
[i
]))
1500 for (j
= i
; j
< nfps
; ++j
)
1501 fps
[j
] = fps
[j
+ 1];
1504 free (buffer
[i
].buf
);
1507 initlines (&lines
[i
], mergealloc
/ linelength
+ 1,
1508 LINEALLOC
/ ((NMERGE
+ NMERGE
) * sizeof (struct line
)));
1509 findlines (&buffer
[i
], &lines
[i
]);
1514 /* Set up the ord table according to comparisons among input lines.
1515 Since this only reorders two items if one is strictly greater than
1516 the other, it is stable. */
1517 for (i
= 0; i
< nfps
; ++i
)
1519 for (i
= 1; i
< nfps
; ++i
)
1520 if (compare (&lines
[ord
[i
- 1]].lines
[cur
[ord
[i
- 1]]],
1521 &lines
[ord
[i
]].lines
[cur
[ord
[i
]]]) > 0)
1522 t
= ord
[i
- 1], ord
[i
- 1] = ord
[i
], ord
[i
] = t
, i
= 0;
1524 /* Repeatedly output the smallest line until no input remains. */
1527 /* If uniquified output is turned on, output only the first of
1528 an identical series of lines. */
1531 if (savedflag
&& compare (&saved
, &lines
[ord
[0]].lines
[cur
[ord
[0]]]))
1533 write_bytes (saved
.text
, saved
.length
, ofp
);
1538 if (savealloc
< lines
[ord
[0]].lines
[cur
[ord
[0]]].length
+ 1)
1540 while (savealloc
< lines
[ord
[0]].lines
[cur
[ord
[0]]].length
+ 1)
1542 saved
.text
= xrealloc (saved
.text
, savealloc
);
1544 saved
.length
= lines
[ord
[0]].lines
[cur
[ord
[0]]].length
;
1545 memcpy (saved
.text
, lines
[ord
[0]].lines
[cur
[ord
[0]]].text
,
1547 if (lines
[ord
[0]].lines
[cur
[ord
[0]]].keybeg
!= NULL
)
1549 saved
.keybeg
= saved
.text
+
1550 (lines
[ord
[0]].lines
[cur
[ord
[0]]].keybeg
1551 - lines
[ord
[0]].lines
[cur
[ord
[0]]].text
);
1553 if (lines
[ord
[0]].lines
[cur
[ord
[0]]].keylim
!= NULL
)
1555 saved
.keylim
= saved
.text
+
1556 (lines
[ord
[0]].lines
[cur
[ord
[0]]].keylim
1557 - lines
[ord
[0]].lines
[cur
[ord
[0]]].text
);
1563 write_bytes (lines
[ord
[0]].lines
[cur
[ord
[0]]].text
,
1564 lines
[ord
[0]].lines
[cur
[ord
[0]]].length
, ofp
);
1566 /* Check if we need to read more lines into core. */
1567 if (++cur
[ord
[0]] == lines
[ord
[0]].used
)
1569 if (fillbuf (&buffer
[ord
[0]], fps
[ord
[0]]))
1571 findlines (&buffer
[ord
[0]], &lines
[ord
[0]]);
1576 /* We reached EOF on fps[ord[0]]. */
1577 for (i
= 1; i
< nfps
; ++i
)
1578 if (ord
[i
] > ord
[0])
1581 xfclose (fps
[ord
[0]]);
1582 free (buffer
[ord
[0]].buf
);
1583 free ((char *) lines
[ord
[0]].lines
);
1584 for (i
= ord
[0]; i
< nfps
; ++i
)
1586 fps
[i
] = fps
[i
+ 1];
1587 buffer
[i
] = buffer
[i
+ 1];
1588 lines
[i
] = lines
[i
+ 1];
1589 cur
[i
] = cur
[i
+ 1];
1591 for (i
= 0; i
< nfps
; ++i
)
1592 ord
[i
] = ord
[i
+ 1];
1597 /* The new line just read in may be larger than other lines
1598 already in core; push it back in the queue until we encounter
1599 a line larger than it. */
1600 for (i
= 1; i
< nfps
; ++i
)
1602 t
= compare (&lines
[ord
[0]].lines
[cur
[ord
[0]]],
1603 &lines
[ord
[i
]].lines
[cur
[ord
[i
]]]);
1605 t
= ord
[0] - ord
[i
];
1610 for (j
= 1; j
< i
; ++j
)
1611 ord
[j
- 1] = ord
[j
];
1615 if (unique
&& savedflag
)
1617 write_bytes (saved
.text
, saved
.length
, ofp
);
1622 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1625 sortlines (struct line
*lines
, int nlines
, struct line
*temp
)
1627 register struct line
*lo
, *hi
, *t
;
1628 register int nlo
, nhi
;
1632 if (compare (&lines
[0], &lines
[1]) > 0)
1635 lines
[0] = lines
[1];
1647 sortlines (lo
, nlo
, temp
);
1650 sortlines (hi
, nhi
, temp
);
1655 if (compare (lo
, hi
) <= 0)
1656 *t
++ = *lo
++, --nlo
;
1658 *t
++ = *hi
++, --nhi
;
1662 for (lo
= lines
, nlo
= nlines
- nhi
, t
= temp
; nlo
; --nlo
)
1666 /* Check that each of the NFILES FILES is ordered.
1667 Return a count of disordered files. */
1670 check (char **files
, int nfiles
)
1672 int i
, disorders
= 0;
1675 for (i
= 0; i
< nfiles
; ++i
)
1677 fp
= xfopen (files
[i
], "r");
1678 if (checkfp (fp
, files
[i
]))
1686 /* Merge NFILES FILES onto OFP. */
1689 merge (char **files
, int nfiles
, FILE *ofp
)
1693 FILE *fps
[NMERGE
], *tfp
;
1695 while (nfiles
> NMERGE
)
1698 for (i
= 0; i
< nfiles
/ NMERGE
; ++i
)
1700 for (j
= 0; j
< NMERGE
; ++j
)
1701 fps
[j
] = xfopen (files
[i
* NMERGE
+ j
], "r");
1702 tfp
= xtmpfopen (temp
= tempname ());
1703 mergefps (fps
, NMERGE
, tfp
);
1705 for (j
= 0; j
< NMERGE
; ++j
)
1706 zaptemp (files
[i
* NMERGE
+ j
]);
1709 for (j
= 0; j
< nfiles
% NMERGE
; ++j
)
1710 fps
[j
] = xfopen (files
[i
* NMERGE
+ j
], "r");
1711 tfp
= xtmpfopen (temp
= tempname ());
1712 mergefps (fps
, nfiles
% NMERGE
, tfp
);
1714 for (j
= 0; j
< nfiles
% NMERGE
; ++j
)
1715 zaptemp (files
[i
* NMERGE
+ j
]);
1720 for (i
= 0; i
< nfiles
; ++i
)
1721 fps
[i
] = xfopen (files
[i
], "r");
1722 mergefps (fps
, i
, ofp
);
1723 for (i
= 0; i
< nfiles
; ++i
)
1727 /* Sort NFILES FILES onto OFP. */
1730 sort (char **files
, int nfiles
, FILE *ofp
)
1737 struct tempnode
*node
;
1738 int n_temp_files
= 0;
1741 initbuf (&buf
, sortalloc
);
1742 initlines (&lines
, sortalloc
/ linelength
+ 1,
1743 LINEALLOC
/ sizeof (struct line
));
1745 tmp
= (struct line
*) xmalloc (ntmp
* sizeof (struct line
));
1749 fp
= xfopen (*files
++, "r");
1750 while (fillbuf (&buf
, fp
))
1752 findlines (&buf
, &lines
);
1753 if (lines
.used
> ntmp
)
1755 while (lines
.used
> ntmp
)
1757 tmp
= (struct line
*)
1758 xrealloc ((char *) tmp
, ntmp
* sizeof (struct line
));
1760 sortlines (lines
.lines
, lines
.used
, tmp
);
1761 if (feof (fp
) && !nfiles
&& !n_temp_files
&& !buf
.left
)
1768 tfp
= xtmpfopen (tempname ());
1770 for (i
= 0; i
< lines
.used
; ++i
)
1771 if (!unique
|| i
== 0
1772 || compare (&lines
.lines
[i
], &lines
.lines
[i
- 1]))
1773 write_bytes (lines
.lines
[i
].text
, lines
.lines
[i
].length
, tfp
);
1781 free ((char *) lines
.lines
);
1782 free ((char *) tmp
);
1786 tempfiles
= (char **) xmalloc (n_temp_files
* sizeof (char *));
1788 for (node
= temphead
.next
; i
> 0; node
= node
->next
)
1789 tempfiles
[--i
] = node
->name
;
1790 merge (tempfiles
, n_temp_files
, ofp
);
1791 free ((char *) tempfiles
);
1795 /* Insert key KEY at the end of the list (`keyhead'). */
1798 insertkey (struct keyfield
*key
)
1800 struct keyfield
*k
= &keyhead
;
1809 badfieldspec (const char *s
)
1811 error (SORT_FAILURE
, 0, _("invalid field specification `%s'"), s
);
1814 /* Handle interrupts and hangups. */
1817 sighandler (int sig
)
1820 struct sigaction sigact
;
1822 sigact
.sa_handler
= SIG_DFL
;
1823 sigemptyset (&sigact
.sa_mask
);
1824 sigact
.sa_flags
= 0;
1825 sigaction (sig
, &sigact
, NULL
);
1826 #else /* !SA_INTERRUPT */
1827 signal (sig
, SIG_DFL
);
1828 #endif /* SA_INTERRUPT */
1830 kill (getpid (), sig
);
1833 /* Set the ordering options for KEY specified in S.
1834 Return the address of the first character in S that
1835 is not a valid ordering option.
1836 BLANKTYPE is the kind of blanks that 'b' should skip. */
1839 set_ordering (register const char *s
, struct keyfield
*key
,
1840 enum blanktype blanktype
)
1847 if (blanktype
== bl_start
|| blanktype
== bl_both
)
1848 key
->skipsblanks
= 1;
1849 if (blanktype
== bl_end
|| blanktype
== bl_both
)
1850 key
->skipeblanks
= 1;
1853 key
->ignore
= nondictionary
;
1856 key
->translate
= fold_toupper
;
1859 key
->general_numeric
= 1;
1862 key
->ignore
= nonprinting
;
1882 key_init (struct keyfield
*key
)
1884 memset (key
, 0, sizeof (*key
));
1889 main (int argc
, char **argv
)
1891 struct keyfield
*key
= NULL
, gkey
;
1894 int checkonly
= 0, mergeonly
= 0, nfiles
= 0;
1895 char *minus
= "-", *outfile
= minus
, **files
, *tmp
;
1898 struct sigaction oldact
, newact
;
1899 #endif /* SA_INTERRUPT */
1901 program_name
= argv
[0];
1902 setlocale (LC_ALL
, "");
1903 bindtextdomain (PACKAGE
, LOCALEDIR
);
1904 textdomain (PACKAGE
);
1908 hard_LC_COLLATE
= hard_locale (LC_COLLATE
);
1909 hard_LC_CTYPE
= hard_locale (LC_CTYPE
);
1910 # if HAVE_NL_LANGINFO
1911 hard_LC_TIME
= hard_locale (LC_TIME
);
1914 /* Let's get locale's representation of the decimal point */
1916 struct lconv
*lconvp
= localeconv ();
1918 /* If the locale doesn't define a decimal point, or if the decimal
1919 point is multibyte, use the C decimal point. We don't support
1920 multibyte decimal points yet. */
1921 decimal_point
= *lconvp
->decimal_point
;
1922 if (! decimal_point
|| lconvp
->decimal_point
[1])
1923 decimal_point
= C_DECIMAL_POINT
;
1925 /* We don't support multibyte thousands separators yet. */
1926 th_sep
= *lconvp
->thousands_sep
;
1927 if (! th_sep
|| lconvp
->thousands_sep
[1])
1928 th_sep
= CHAR_MAX
+ 1;
1933 parse_long_options (argc
, argv
, PROGRAM_NAME
, GNU_PACKAGE
, VERSION
,
1936 have_read_stdin
= 0;
1939 temp_dir
= getenv ("TMPDIR");
1940 if (temp_dir
== NULL
)
1941 temp_dir
= DEFAULT_TMPDIR
;
1943 /* Change the way xmalloc and xrealloc fail. */
1944 xalloc_exit_failure
= SORT_FAILURE
;
1945 xalloc_fail_func
= cleanup
;
1948 newact
.sa_handler
= sighandler
;
1949 sigemptyset (&newact
.sa_mask
);
1950 newact
.sa_flags
= 0;
1952 sigaction (SIGINT
, NULL
, &oldact
);
1953 if (oldact
.sa_handler
!= SIG_IGN
)
1954 sigaction (SIGINT
, &newact
, NULL
);
1955 sigaction (SIGHUP
, NULL
, &oldact
);
1956 if (oldact
.sa_handler
!= SIG_IGN
)
1957 sigaction (SIGHUP
, &newact
, NULL
);
1958 sigaction (SIGPIPE
, NULL
, &oldact
);
1959 if (oldact
.sa_handler
!= SIG_IGN
)
1960 sigaction (SIGPIPE
, &newact
, NULL
);
1961 sigaction (SIGTERM
, NULL
, &oldact
);
1962 if (oldact
.sa_handler
!= SIG_IGN
)
1963 sigaction (SIGTERM
, &newact
, NULL
);
1964 #else /* !SA_INTERRUPT */
1965 if (signal (SIGINT
, SIG_IGN
) != SIG_IGN
)
1966 signal (SIGINT
, sighandler
);
1967 if (signal (SIGHUP
, SIG_IGN
) != SIG_IGN
)
1968 signal (SIGHUP
, sighandler
);
1969 if (signal (SIGPIPE
, SIG_IGN
) != SIG_IGN
)
1970 signal (SIGPIPE
, sighandler
);
1971 if (signal (SIGTERM
, SIG_IGN
) != SIG_IGN
)
1972 signal (SIGTERM
, sighandler
);
1973 #endif /* !SA_INTERRUPT */
1975 gkey
.sword
= gkey
.eword
= -1;
1977 gkey
.translate
= NULL
;
1978 gkey
.numeric
= gkey
.general_numeric
= gkey
.month
= gkey
.reverse
= 0;
1979 gkey
.skipsblanks
= gkey
.skipeblanks
= 0;
1981 files
= (char **) xmalloc (sizeof (char *) * argc
);
1983 for (i
= 1; i
< argc
; ++i
)
1985 if (argv
[i
][0] == '+')
1989 key
= (struct keyfield
*) xmalloc (sizeof (struct keyfield
));
1992 if (! (ISDIGIT (*s
) || (*s
== '.' && ISDIGIT (s
[1]))))
1993 badfieldspec (argv
[i
]);
1994 for (t
= 0; ISDIGIT (*s
); ++s
)
1995 t
= 10 * t
+ *s
- '0';
1998 for (++s
; ISDIGIT (*s
); ++s
)
1999 t2
= 10 * t2
+ *s
- '0';
2007 s
= set_ordering (s
, key
, bl_start
);
2009 badfieldspec (argv
[i
]);
2011 else if (argv
[i
][0] == '-' && argv
[i
][1])
2014 if (ISDIGIT (*s
) || (*s
== '.' && ISDIGIT (s
[1])))
2018 /* Provoke with `sort -9'. */
2019 error (0, 0, _("when using the old-style +POS and -POS \
2020 key specifiers,\nthe +POS specifier must come first"));
2021 usage (SORT_FAILURE
);
2023 for (t
= 0; ISDIGIT (*s
); ++s
)
2024 t
= t
* 10 + *s
- '0';
2027 for (++s
; ISDIGIT (*s
); ++s
)
2028 t2
= t2
* 10 + *s
- '0';
2031 s
= set_ordering (s
, key
, bl_end
);
2033 badfieldspec (argv
[i
]);
2040 s
= set_ordering (s
, &gkey
, bl_both
);
2054 error (SORT_FAILURE
, 0,
2055 _("option `-k' requires an argument"));
2061 key
= (struct keyfield
*)
2062 xmalloc (sizeof (struct keyfield
));
2066 badfieldspec (argv
[i
]);
2067 for (t
= 0; ISDIGIT (*s
); ++s
)
2068 t
= 10 * t
+ *s
- '0';
2071 /* Provoke with `sort -k0' */
2072 error (0, 0, _("the starting field number argument \
2073 to the `-k' option must be positive"));
2074 badfieldspec (argv
[i
]);
2080 if (!ISDIGIT (s
[1]))
2082 /* Provoke with `sort -k1.' */
2083 error (0, 0, _("starting field spec has `.' but \
2084 lacks following character offset"));
2085 badfieldspec (argv
[i
]);
2087 for (++s
; ISDIGIT (*s
); ++s
)
2088 t2
= 10 * t2
+ *s
- '0';
2091 /* Provoke with `sort -k1.0' */
2092 error (0, 0, _("starting field character offset \
2093 argument to the `-k' option\nmust be positive"));
2094 badfieldspec (argv
[i
]);
2105 s
= set_ordering (s
, key
, bl_start
);
2112 badfieldspec (argv
[i
]);
2115 /* Skip over comma. */
2119 /* Provoke with `sort -k1,' */
2120 error (0, 0, _("field specification has `,' but \
2121 lacks following field spec"));
2122 badfieldspec (argv
[i
]);
2125 for (t
= 0; ISDIGIT (*s
); ++s
)
2126 t
= t
* 10 + *s
- '0';
2129 /* Provoke with `sort -k1,0' */
2130 error (0, 0, _("ending field number argument \
2131 to the `-k' option must be positive"));
2132 badfieldspec (argv
[i
]);
2138 if (!ISDIGIT (s
[1]))
2140 /* Provoke with `sort -k1,1.' */
2141 error (0, 0, _("ending field spec has `.' \
2142 but lacks following character offset"));
2143 badfieldspec (argv
[i
]);
2145 for (++s
; ISDIGIT (*s
); ++s
)
2146 t2
= t2
* 10 + *s
- '0';
2150 /* `-k 2,3' is equivalent to `+1 -3'. */
2155 s
= set_ordering (s
, key
, bl_end
);
2157 badfieldspec (argv
[i
]);
2171 error (SORT_FAILURE
, 0,
2172 _("option `-o' requires an argument"));
2174 outfile
= argv
[++i
];
2183 else if (i
< argc
- 1)
2189 error (SORT_FAILURE
, 0,
2190 _("option `-t' requires an argument"));
2198 temp_dir
= argv
[++i
];
2200 error (SORT_FAILURE
, 0,
2201 _("option `-T' requires an argument"));
2212 /* Accept and ignore e.g. -y0 for compatibility with
2216 fprintf (stderr
, _("%s: unrecognized option `-%c'\n"),
2218 usage (SORT_FAILURE
);
2224 else /* Not an option. */
2226 files
[nfiles
++] = argv
[i
];
2234 /* Inheritance of global options to individual keys. */
2235 for (key
= keyhead
.next
; key
; key
= key
->next
)
2236 if (!key
->ignore
&& !key
->translate
&& !key
->skipsblanks
&& !key
->reverse
2237 && !key
->skipeblanks
&& !key
->month
&& !key
->numeric
2238 && !key
->general_numeric
)
2240 key
->ignore
= gkey
.ignore
;
2241 key
->translate
= gkey
.translate
;
2242 key
->skipsblanks
= gkey
.skipsblanks
;
2243 key
->skipeblanks
= gkey
.skipeblanks
;
2244 key
->month
= gkey
.month
;
2245 key
->numeric
= gkey
.numeric
;
2246 key
->general_numeric
= gkey
.general_numeric
;
2247 key
->reverse
= gkey
.reverse
;
2250 if (!keyhead
.next
&& (gkey
.ignore
|| gkey
.translate
|| gkey
.skipsblanks
2251 || gkey
.skipeblanks
|| gkey
.month
|| gkey
.numeric
2252 || gkey
.general_numeric
))
2254 reverse
= gkey
.reverse
;
2264 /* POSIX requires that sort return 1 IFF invoked with -c and the
2265 input is not properly sorted. */
2266 exit (check (files
, nfiles
) == 0 ? 0 : 1);
2269 if (!STREQ (outfile
, "-"))
2271 struct stat outstat
;
2272 if (stat (outfile
, &outstat
) == 0)
2274 /* The following code prevents a race condition when
2275 people use the brain dead shell programming idiom:
2276 cat file | sort -o file
2277 This feature is provided for historical compatibility,
2278 but we strongly discourage ever relying on this in
2279 new shell programs. */
2281 /* Temporarily copy each input file that might be another name
2282 for the output file. When in doubt (e.g. a pipe), copy. */
2283 for (i
= 0; i
< nfiles
; ++i
)
2290 if (S_ISREG (outstat
.st_mode
) && !STREQ (outfile
, files
[i
]))
2293 if ((STREQ (files
[i
], "-")
2294 ? fstat (STDIN_FILENO
, &instat
)
2295 : stat (files
[i
], &instat
)) != 0)
2297 error (0, errno
, "%s", files
[i
]);
2299 exit (SORT_FAILURE
);
2301 if (S_ISREG (instat
.st_mode
) && !SAME_INODE (instat
, outstat
))
2303 /* We know the files are distinct. */
2308 in_fp
= xfopen (files
[i
], "r");
2310 out_fp
= xtmpfopen (tmp
);
2311 /* FIXME: maybe use copy.c(copy) here. */
2312 while ((cc
= fread (buf
, 1, sizeof buf
, in_fp
)) > 0)
2313 write_bytes (buf
, cc
, out_fp
);
2316 error (0, errno
, "%s", files
[i
]);
2318 exit (SORT_FAILURE
);
2324 ofp
= xfopen (outfile
, "w");
2328 /* A non-`-' outfile was specified, but the file doesn't yet exist.
2329 Before opening it for writing (thus creating it), make sure all
2330 of the input files exist. Otherwise, creating the output file
2331 could create an otherwise missing input file, making sort succeed
2332 when it should fail. */
2333 for (i
= 0; i
< nfiles
; ++i
)
2336 if (STREQ (files
[i
], "-"))
2338 if (stat (files
[i
], &sb
))
2340 error (0, errno
, "%s", files
[i
]);
2342 exit (SORT_FAILURE
);
2346 ofp
= xfopen (outfile
, "w");
2355 merge (files
, nfiles
, ofp
);
2357 sort (files
, nfiles
, ofp
);
2360 /* If we wait for the implicit flush on exit, and the parent process
2361 has closed stdout (e.g., exec >&- in a shell), then the output file
2362 winds up empty. I don't understand why. This is under SunOS,
2363 Solaris, Ultrix, and Irix. This premature fflush makes the output
2364 reappear. --karl@cs.umb.edu */
2365 if (fflush (ofp
) < 0)
2366 error (SORT_FAILURE
, errno
, _("%s: write error"), outfile
);
2368 if (have_read_stdin
&& fclose (stdin
) == EOF
)
2369 error (SORT_FAILURE
, errno
, "%s", outfile
);
2370 if (ferror (stdout
) || fclose (stdout
) == EOF
)
2371 error (SORT_FAILURE
, errno
, _("%s: write error"), outfile
);
2373 exit (EXIT_SUCCESS
);