(EXTRA_DIST): Distribute $(TESTS).
[coreutils.git] / src / sort.c
blob90c28567dd7a6b009325996683175ecedd6fefd1
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 91, 92, 93, 94, 95, 1996 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
22 #include <config.h>
24 /* Get isblank from GNU libc. */
25 #define _GNU_SOURCE
27 #include <sys/types.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #define NDEBUG 1
31 #include <assert.h>
32 #include "system.h"
33 #include "long-options.h"
34 #include "error.h"
35 #include "xstrtod.h"
37 #ifdef HAVE_LIMITS_H
38 #include <limits.h>
39 #else
40 #ifndef UCHAR_MAX
41 #define UCHAR_MAX 255
42 #endif
43 #endif
44 #ifndef STDC_HEADERS
45 char *malloc ();
46 char *realloc ();
47 void free ();
48 #endif
50 /* Undefine, to avoid warning about redefinition on some systems. */
51 #undef min
52 #define min(a, b) ((a) < (b) ? (a) : (b))
54 #define UCHAR_LIM (UCHAR_MAX + 1)
55 #define UCHAR(c) ((unsigned char) (c))
57 #ifndef DEFAULT_TMPDIR
58 #define DEFAULT_TMPDIR "/tmp"
59 #endif
61 /* Use this as exit status in case of error, not EXIT_FAILURE. This
62 is necessary because EXIT_FAILURE is usually 1 and POSIX requires
63 that sort exit with status 1 IFF invoked with -c and the input is
64 not properly sorted. Any other irregular exit must exit with a
65 status code greater than 1. */
66 #define SORT_FAILURE 2
68 /* The kind of blanks for '-b' to skip in various options. */
69 enum blanktype { bl_start, bl_end, bl_both };
71 /* The character marking end of line. Default to \n. */
72 int eolchar = '\n';
74 /* Lines are held in core as counted strings. */
75 struct line
77 char *text; /* Text of the line. */
78 int length; /* Length not including final newline. */
79 char *keybeg; /* Start of first key. */
80 char *keylim; /* Limit of first key. */
83 /* Arrays of lines. */
84 struct lines
86 struct line *lines; /* Dynamically allocated array of lines. */
87 int used; /* Number of slots used. */
88 int alloc; /* Number of slots allocated. */
89 int limit; /* Max number of slots to allocate. */
92 /* Input buffers. */
93 struct buffer
95 char *buf; /* Dynamically allocated buffer. */
96 int used; /* Number of bytes used. */
97 int alloc; /* Number of bytes allocated. */
98 int left; /* Number of bytes left after line parsing. */
101 struct keyfield
103 int sword; /* Zero-origin 'word' to start at. */
104 int schar; /* Additional characters to skip. */
105 int skipsblanks; /* Skip leading white space at start. */
106 int eword; /* Zero-origin first word after field. */
107 int echar; /* Additional characters in field. */
108 int skipeblanks; /* Skip trailing white space at finish. */
109 int *ignore; /* Boolean array of characters to ignore. */
110 char *translate; /* Translation applied to characters. */
111 int numeric; /* Flag for numeric comparison. Handle
112 strings of digits with optional decimal
113 point, but no exponential notation. */
114 int general_numeric; /* Flag for general, numeric comparison.
115 Handle numbers in exponential notation. */
116 int month; /* Flag for comparison by month name. */
117 int reverse; /* Reverse the sense of comparison. */
118 struct keyfield *next; /* Next keyfield to try. */
121 struct month
123 char *name;
124 int val;
127 /* The name this program was run with. */
128 char *program_name;
130 /* Table of digits. */
131 static int digits[UCHAR_LIM];
133 /* Table of white space. */
134 static int blanks[UCHAR_LIM];
136 /* Table of non-printing characters. */
137 static int nonprinting[UCHAR_LIM];
139 /* Table of non-dictionary characters (not letters, digits, or blanks). */
140 static int nondictionary[UCHAR_LIM];
142 /* Translation table folding lower case to upper. */
143 static char fold_toupper[UCHAR_LIM];
145 /* Table mapping 3-letter month names to integers.
146 Alphabetic order allows binary search. */
147 static struct month const monthtab[] =
149 {"APR", 4},
150 {"AUG", 8},
151 {"DEC", 12},
152 {"FEB", 2},
153 {"JAN", 1},
154 {"JUL", 7},
155 {"JUN", 6},
156 {"MAR", 3},
157 {"MAY", 5},
158 {"NOV", 11},
159 {"OCT", 10},
160 {"SEP", 9}
163 /* During the merge phase, the number of files to merge at once. */
164 #define NMERGE 16
166 /* Initial buffer size for in core sorting. Will not grow unless a
167 line longer than this is seen. */
168 static int sortalloc = 512 * 1024;
170 /* Initial buffer size for in core merge buffers. Bear in mind that
171 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
172 static int mergealloc = 16 * 1024;
174 /* Guess of average line length. */
175 static int linelength = 30;
177 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
178 #define LINEALLOC (256 * 1024)
180 /* Prefix for temporary file names. */
181 static char *temp_file_prefix;
183 /* Flag to reverse the order of all comparisons. */
184 static int reverse;
186 /* Flag for stable sort. This turns off the last ditch bytewise
187 comparison of lines, and instead leaves lines in the same order
188 they were read if all keys compare equal. */
189 static int stable;
191 /* Tab character separating fields. If NUL, then fields are separated
192 by the empty string between a non-whitespace character and a whitespace
193 character. */
194 static char tab;
196 /* Flag to remove consecutive duplicate lines from the output.
197 Only the last of a sequence of equal lines will be output. */
198 static int unique;
200 /* Nonzero if any of the input files are the standard input. */
201 static int have_read_stdin;
203 /* Lists of key field comparisons to be tried. */
204 static struct keyfield keyhead;
206 static void
207 usage (int status)
209 if (status != 0)
210 fprintf (stderr, _("Try `%s --help' for more information.\n"),
211 program_name);
212 else
214 printf (_("\
215 Usage: %s [OPTION]... [FILE]...\n\
217 program_name);
218 printf (_("\
219 Write sorted concatenation of all FILE(s) to standard output.\n\
221 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
222 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
223 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
224 -b ignore leading blanks in sort fields or keys\n\
225 -c check if given files already sorted, do not sort\n\
226 -d consider only [a-zA-Z0-9 ] characters in keys\n\
227 -f fold lower case to upper case characters in keys\n\
228 -g compare according to general numerical value, imply -b\n\
229 -i consider only [\\040-\\0176] characters in keys\n\
230 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
231 -m merge already sorted files, do not sort\n\
232 -n compare according to string numerical value, imply -b\n\
233 -o FILE write result on FILE instead of standard output\n\
234 -r reverse the result of comparisons\n\
235 -s stabilize sort by disabling last resort comparison\n\
236 -t SEP use SEParator instead of non- to whitespace transition\n\
237 -u with -c, check for strict ordering\n\
238 -u with -m, only output the first of an equal sequence\n\
239 -z end lines with 0 byte, not newline, for find -print0\n\
240 --help display this help and exit\n\
241 --version output version information and exit\n\
243 POS is F[.C][OPTS], where F is the field number and C the character\n\
244 position in the field, both counted from zero. OPTS is made up of one\n\
245 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
246 for that key. If no key given, use the entire line as key. With no\n\
247 FILE, or when FILE is -, read standard input.\n\
249 , DEFAULT_TMPDIR);
250 puts (_("\nReport bugs to bug-gnu-utils@gnu.ai.mit.edu"));
252 /* Don't use EXIT_FAILURE here in case it is defined to be 1.
253 POSIX requires that sort return 1 IFF invoked with -c and
254 the input is not properly sorted. */
255 assert (status == 0 || status == SORT_FAILURE);
256 exit (status);
259 /* The list of temporary files. */
260 static struct tempnode
262 char *name;
263 struct tempnode *next;
264 } temphead;
266 /* Clean up any remaining temporary files. */
268 static void
269 cleanup (void)
271 struct tempnode *node;
273 for (node = temphead.next; node; node = node->next)
274 unlink (node->name);
277 /* Allocate N bytes of memory dynamically, with error checking. */
279 static char *
280 xmalloc (unsigned int n)
282 char *p;
284 p = malloc (n);
285 if (p == 0)
287 error (0, 0, _("virtual memory exhausted"));
288 cleanup ();
289 exit (SORT_FAILURE);
291 return p;
294 /* Change the size of an allocated block of memory P to N bytes,
295 with error checking.
296 If P is NULL, run xmalloc.
297 If N is 0, run free and return NULL. */
299 static char *
300 xrealloc (char *p, unsigned int n)
302 if (p == 0)
303 return xmalloc (n);
304 if (n == 0)
306 free (p);
307 return 0;
309 p = realloc (p, n);
310 if (p == 0)
312 error (0, 0, _("virtual memory exhausted"));
313 cleanup ();
314 exit (SORT_FAILURE);
316 return p;
319 static FILE *
320 xtmpfopen (const char *file)
322 FILE *fp;
323 int fd;
325 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
326 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
328 error (0, errno, "%s", file);
329 cleanup ();
330 exit (SORT_FAILURE);
333 return fp;
336 static FILE *
337 xfopen (const char *file, const char *how)
339 FILE *fp;
341 if (strcmp (file, "-") == 0)
343 fp = stdin;
345 else
347 if ((fp = fopen (file, how)) == NULL)
349 error (0, errno, "%s", file);
350 cleanup ();
351 exit (SORT_FAILURE);
355 if (fp == stdin)
356 have_read_stdin = 1;
357 return fp;
360 static void
361 xfclose (FILE *fp)
363 if (fp == stdin)
365 /* Allow reading stdin from tty more than once. */
366 if (feof (fp))
367 clearerr (fp);
369 else if (fp == stdout)
371 if (fflush (fp) != 0)
373 error (0, errno, _("flushing file"));
374 cleanup ();
375 exit (SORT_FAILURE);
378 else
380 if (fclose (fp) != 0)
382 error (0, errno, _("error closing file"));
383 cleanup ();
384 exit (SORT_FAILURE);
389 static void
390 write_bytes (const char *buf, size_t n_bytes, FILE *fp)
392 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
394 error (0, errno, _("write error"));
395 cleanup ();
396 exit (SORT_FAILURE);
400 /* Return a name for a temporary file. */
402 static char *
403 tempname (void)
405 static unsigned int seq;
406 int len = strlen (temp_file_prefix);
407 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
408 struct tempnode *node;
410 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
411 sprintf (name,
412 "%s%ssort%5.5d%5.5d",
413 temp_file_prefix,
414 (len && temp_file_prefix[len - 1] != '/') ? "/" : "",
415 (unsigned int) getpid () & 0xffff, seq);
417 /* Make sure that SEQ's value fits in 5 digits. */
418 ++seq;
419 if (seq >= 100000)
420 seq = 0;
422 node->name = name;
423 node->next = temphead.next;
424 temphead.next = node;
425 return name;
428 /* Search through the list of temporary files for NAME;
429 remove it if it is found on the list. */
431 static void
432 zaptemp (char *name)
434 struct tempnode *node, *temp;
436 for (node = &temphead; node->next; node = node->next)
437 if (!strcmp (name, node->next->name))
438 break;
439 if (node->next)
441 temp = node->next;
442 unlink (temp->name);
443 free (temp->name);
444 node->next = temp->next;
445 free ((char *) temp);
449 /* Initialize the character class tables. */
451 static void
452 inittables (void)
454 int i;
456 for (i = 0; i < UCHAR_LIM; ++i)
458 if (ISBLANK (i))
459 blanks[i] = 1;
460 if (ISDIGIT (i))
461 digits[i] = 1;
462 if (!ISPRINT (i))
463 nonprinting[i] = 1;
464 if (!ISALNUM (i) && !ISBLANK (i))
465 nondictionary[i] = 1;
466 if (ISLOWER (i))
467 fold_toupper[i] = toupper (i);
468 else
469 fold_toupper[i] = i;
473 /* Initialize BUF, allocating ALLOC bytes initially. */
475 static void
476 initbuf (struct buffer *buf, int alloc)
478 buf->alloc = alloc;
479 buf->buf = xmalloc (buf->alloc);
480 buf->used = buf->left = 0;
483 /* Fill BUF reading from FP, moving buf->left bytes from the end
484 of buf->buf to the beginning first. If EOF is reached and the
485 file wasn't terminated by a newline, supply one. Return a count
486 of bytes buffered. */
488 static int
489 fillbuf (struct buffer *buf, FILE *fp)
491 int cc;
493 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
494 buf->used = buf->left;
496 while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, eolchar, buf->used)))
498 if (buf->used == buf->alloc)
500 buf->alloc *= 2;
501 buf->buf = xrealloc (buf->buf, buf->alloc);
503 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
504 if (ferror (fp))
506 error (0, errno, _("read error"));
507 cleanup ();
508 exit (SORT_FAILURE);
510 buf->used += cc;
513 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
515 if (buf->used == buf->alloc)
517 buf->alloc *= 2;
518 buf->buf = xrealloc (buf->buf, buf->alloc);
520 buf->buf[buf->used++] = eolchar;
523 return buf->used;
526 /* Initialize LINES, allocating space for ALLOC lines initially.
527 LIMIT is the maximum possible number of lines to allocate space
528 for, ever. */
530 static void
531 initlines (struct lines *lines, int alloc, int limit)
533 lines->alloc = alloc;
534 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
535 lines->used = 0;
536 lines->limit = limit;
539 /* Return a pointer to the first character of the field specified
540 by KEY in LINE. */
542 static char *
543 begfield (const struct line *line, const struct keyfield *key)
545 register char *ptr = line->text, *lim = ptr + line->length;
546 register int sword = key->sword, schar = key->schar;
548 if (tab)
549 while (ptr < lim && sword--)
551 while (ptr < lim && *ptr != tab)
552 ++ptr;
553 if (ptr < lim)
554 ++ptr;
556 else
557 while (ptr < lim && sword--)
559 while (ptr < lim && blanks[UCHAR (*ptr)])
560 ++ptr;
561 while (ptr < lim && !blanks[UCHAR (*ptr)])
562 ++ptr;
565 if (key->skipsblanks)
566 while (ptr < lim && blanks[UCHAR (*ptr)])
567 ++ptr;
569 if (ptr + schar <= lim)
570 ptr += schar;
571 else
572 ptr = lim;
574 return ptr;
577 /* Return the limit of (a pointer to the first character after) the field
578 in LINE specified by KEY. */
580 static char *
581 limfield (const struct line *line, const struct keyfield *key)
583 register char *ptr = line->text, *lim = ptr + line->length;
584 register int eword = key->eword, echar = key->echar;
586 /* Note: from the POSIX spec:
587 The leading field separator itself is included in
588 a field when -t is not used. FIXME: move this comment up... */
590 /* Move PTR past EWORD fields or to one past the last byte on LINE,
591 whichever comes first. If there are more than EWORD fields, leave
592 PTR pointing at the beginning of the field having zero-based index,
593 EWORD. If a delimiter character was specified (via -t), then that
594 `beginning' is the first character following the delimiting TAB.
595 Otherwise, leave PTR pointing at the first `blank' character after
596 the preceding field. */
597 if (tab)
598 while (ptr < lim && eword--)
600 while (ptr < lim && *ptr != tab)
601 ++ptr;
602 if (ptr < lim && (eword || echar > 0))
603 ++ptr;
605 else
606 while (ptr < lim && eword--)
608 while (ptr < lim && blanks[UCHAR (*ptr)])
609 ++ptr;
610 while (ptr < lim && !blanks[UCHAR (*ptr)])
611 ++ptr;
614 #ifdef POSIX_UNSPECIFIED
615 /* The following block of code makes GNU sort incompatible with
616 standard Unix sort, so it's ifdef'd out for now.
617 The POSIX spec isn't clear on how to interpret this.
618 FIXME: request clarification.
620 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
621 Date: Thu, 30 May 96 12:20:41 -0400
623 [...]I believe I've found another bug in `sort'.
625 $ cat /tmp/sort.in
626 a b c 2 d
627 pq rs 1 t
628 $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
629 a b c 2 d
630 pq rs 1 t
631 $ /bin/sort +0.6 -0.7 </tmp/sort.in
632 pq rs 1 t
633 a b c 2 d
635 Unix sort produced the answer I expected: sort on the single character
636 in column 6. GNU sort produced different results, because it disagrees
637 on the interpretation of the key-end spec "-M.N". Unix sort reads this
638 as "skip M fields, then N characters"; but GNU sort wants it to mean
639 "skip M fields, then either N characters or the rest of the current
640 field, whichever comes first". This extra clause applies only to
641 key-ends, not key-starts.
644 /* Make LIM point to the end of (one byte past) the current field. */
645 if (tab)
647 char *newlim;
648 newlim = memchr (ptr, tab, lim - ptr);
649 if (newlim)
650 lim = newlim;
652 else
654 char *newlim;
655 newlim = ptr;
656 while (newlim < lim && blanks[UCHAR (*newlim)])
657 ++newlim;
658 while (newlim < lim && !blanks[UCHAR (*newlim)])
659 ++newlim;
660 lim = newlim;
662 #endif
664 /* If we're skipping leading blanks, don't start counting characters
665 until after skipping past any leading blanks. */
666 if (key->skipsblanks)
667 while (ptr < lim && blanks[UCHAR (*ptr)])
668 ++ptr;
670 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
671 if (ptr + echar <= lim)
672 ptr += echar;
673 else
674 ptr = lim;
676 return ptr;
679 /* FIXME */
681 void
682 trim_trailing_blanks (const char *a_start, char **a_end)
684 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
685 --(*a_end);
688 /* Find the lines in BUF, storing pointers and lengths in LINES.
689 Also replace newlines in BUF with NULs. */
691 static void
692 findlines (struct buffer *buf, struct lines *lines)
694 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
695 struct keyfield *key = keyhead.next;
697 lines->used = 0;
699 while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))
700 && lines->used < lines->limit)
702 /* There are various places in the code that rely on a NUL
703 being at the end of in-core lines; NULs inside the lines
704 will not cause trouble, though. */
705 *ptr = '\0';
707 if (lines->used == lines->alloc)
709 lines->alloc *= 2;
710 lines->lines = (struct line *)
711 xrealloc ((char *) lines->lines,
712 lines->alloc * sizeof (struct line));
715 lines->lines[lines->used].text = beg;
716 lines->lines[lines->used].length = ptr - beg;
718 /* Precompute the position of the first key for efficiency. */
719 if (key)
721 if (key->eword >= 0)
722 lines->lines[lines->used].keylim =
723 limfield (&lines->lines[lines->used], key);
724 else
725 lines->lines[lines->used].keylim = ptr;
727 if (key->sword >= 0)
728 lines->lines[lines->used].keybeg =
729 begfield (&lines->lines[lines->used], key);
730 else
732 if (key->skipsblanks)
733 while (blanks[UCHAR (*beg)])
734 ++beg;
735 lines->lines[lines->used].keybeg = beg;
737 if (key->skipeblanks)
739 trim_trailing_blanks (lines->lines[lines->used].keybeg,
740 &lines->lines[lines->used].keylim);
743 else
745 lines->lines[lines->used].keybeg = 0;
746 lines->lines[lines->used].keylim = 0;
749 ++lines->used;
750 beg = ptr + 1;
753 buf->left = lim - beg;
756 /* Compare strings A and B containing decimal fractions < 1. Each string
757 should begin with a decimal point followed immediately by the digits
758 of the fraction. Strings not of this form are considered to be zero. */
760 static int
761 fraccompare (register const char *a, register const char *b)
763 register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
765 if (tmpa == '.' && tmpb == '.')
768 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
769 while (tmpa == tmpb && digits[tmpa]);
770 if (digits[tmpa] && digits[tmpb])
771 return tmpa - tmpb;
772 if (digits[tmpa])
774 while (tmpa == '0')
775 tmpa = UCHAR (*++a);
776 if (digits[tmpa])
777 return 1;
778 return 0;
780 if (digits[tmpb])
782 while (tmpb == '0')
783 tmpb = UCHAR (*++b);
784 if (digits[tmpb])
785 return -1;
786 return 0;
788 return 0;
790 else if (tmpa == '.')
793 tmpa = UCHAR (*++a);
794 while (tmpa == '0');
795 if (digits[tmpa])
796 return 1;
797 return 0;
799 else if (tmpb == '.')
802 tmpb = UCHAR (*++b);
803 while (tmpb == '0');
804 if (digits[tmpb])
805 return -1;
806 return 0;
808 return 0;
811 /* Compare strings A and B as numbers without explicitly converting them to
812 machine numbers. Comparatively slow for short strings, but asymptotically
813 hideously fast. */
815 static int
816 numcompare (register const char *a, register const char *b)
818 register int tmpa, tmpb, loga, logb, tmp;
820 tmpa = UCHAR (*a);
821 tmpb = UCHAR (*b);
823 while (blanks[tmpa])
824 tmpa = UCHAR (*++a);
825 while (blanks[tmpb])
826 tmpb = UCHAR (*++b);
828 if (tmpa == '-')
831 tmpa = UCHAR (*++a);
832 while (tmpa == '0');
833 if (tmpb != '-')
835 if (tmpa == '.')
837 tmpa = UCHAR (*++a);
838 while (tmpa == '0');
839 if (digits[tmpa])
840 return -1;
841 while (tmpb == '0')
842 tmpb = UCHAR (*++b);
843 if (tmpb == '.')
845 tmpb = *++b;
846 while (tmpb == '0');
847 if (digits[tmpb])
848 return -1;
849 return 0;
852 tmpb = UCHAR (*++b);
853 while (tmpb == '0');
855 while (tmpa == tmpb && digits[tmpa])
856 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
858 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
859 return -fraccompare (a, b);
861 if (digits[tmpa])
862 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
864 else
865 loga = 0;
867 if (digits[tmpb])
868 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
870 else
871 logb = 0;
873 if ((tmp = logb - loga) != 0)
874 return tmp;
876 if (!loga)
877 return 0;
879 return tmpb - tmpa;
881 else if (tmpb == '-')
884 tmpb = UCHAR (*++b);
885 while (tmpb == '0');
886 if (tmpb == '.')
888 tmpb = *++b;
889 while (tmpb == '0');
890 if (digits[tmpb])
891 return 1;
892 while (tmpa == '0')
893 tmpa = UCHAR (*++a);
894 if (tmpa == '.')
896 tmpa = UCHAR (*++a);
897 while (tmpa == '0');
898 if (digits[tmpa])
899 return 1;
900 return 0;
902 else
904 while (tmpa == '0')
905 tmpa = UCHAR (*++a);
906 while (tmpb == '0')
907 tmpb = UCHAR (*++b);
909 while (tmpa == tmpb && digits[tmpa])
910 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
912 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
913 return fraccompare (a, b);
915 if (digits[tmpa])
916 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
918 else
919 loga = 0;
921 if (digits[tmpb])
922 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
924 else
925 logb = 0;
927 if ((tmp = loga - logb) != 0)
928 return tmp;
930 if (!loga)
931 return 0;
933 return tmpa - tmpb;
937 static int
938 general_numcompare (const char *sa, const char *sb)
940 double a, b;
941 /* FIXME: add option to warn about failed conversions. */
942 /* FIXME: maybe add option to try expensive FP conversion
943 only if A and B can't be compared more cheaply/accurately. */
944 if (xstrtod (sa, NULL, &a))
946 a = 0;
948 if (xstrtod (sb, NULL, &b))
950 b = 0;
952 return a == b ? 0 : a < b ? -1 : 1;
955 /* Return an integer <= 12 associated with month name S with length LEN,
956 0 if the name in S is not recognized. */
958 static int
959 getmonth (const char *s, int len)
961 char month[4];
962 register int i, lo = 0, hi = 12;
964 while (len > 0 && blanks[UCHAR(*s)])
965 ++s, --len;
967 if (len < 3)
968 return 0;
970 for (i = 0; i < 3; ++i)
971 month[i] = fold_toupper[UCHAR (s[i])];
972 month[3] = '\0';
974 while (hi - lo > 1)
975 if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
976 hi = (lo + hi) / 2;
977 else
978 lo = (lo + hi) / 2;
979 if (!strcmp (month, monthtab[lo].name))
980 return monthtab[lo].val;
981 return 0;
984 /* Compare two lines A and B trying every key in sequence until there
985 are no more keys or a difference is found. */
987 static int
988 keycompare (const struct line *a, const struct line *b)
990 register char *texta, *textb, *lima, *limb;
991 register unsigned char *translate;
992 register int *ignore;
993 struct keyfield *key;
994 int diff = 0, iter = 0, lena, lenb;
996 for (key = keyhead.next; key; key = key->next, ++iter)
998 ignore = key->ignore;
999 translate = (unsigned char *) key->translate;
1001 /* Find the beginning and limit of each field. */
1002 if (iter || a->keybeg == NULL || b->keybeg == NULL)
1004 if (key->eword >= 0)
1005 lima = limfield (a, key), limb = limfield (b, key);
1006 else
1007 lima = a->text + a->length, limb = b->text + b->length;
1009 if (key->sword >= 0)
1010 texta = begfield (a, key), textb = begfield (b, key);
1011 else
1013 texta = a->text, textb = b->text;
1014 if (key->skipsblanks)
1016 while (texta < lima && blanks[UCHAR (*texta)])
1017 ++texta;
1018 while (textb < limb && blanks[UCHAR (*textb)])
1019 ++textb;
1023 else
1025 /* For the first iteration only, the key positions have
1026 been precomputed for us. */
1027 texta = a->keybeg, lima = a->keylim;
1028 textb = b->keybeg, limb = b->keylim;
1031 /* Find the lengths. */
1032 lena = lima - texta, lenb = limb - textb;
1033 if (lena < 0)
1034 lena = 0;
1035 if (lenb < 0)
1036 lenb = 0;
1038 if (key->skipeblanks)
1040 char *a_end = texta + lena;
1041 char *b_end = textb + lenb;
1042 trim_trailing_blanks (texta, &a_end);
1043 trim_trailing_blanks (textb, &b_end);
1044 lena = a_end - texta;
1045 lenb = b_end - textb;
1048 /* Actually compare the fields. */
1049 if (key->numeric)
1051 if (*lima || *limb)
1053 char savea = *lima, saveb = *limb;
1055 *lima = *limb = '\0';
1056 diff = numcompare (texta, textb);
1057 *lima = savea, *limb = saveb;
1059 else
1060 diff = numcompare (texta, textb);
1062 if (diff)
1063 return key->reverse ? -diff : diff;
1064 continue;
1066 else if (key->general_numeric)
1068 if (*lima || *limb)
1070 char savea = *lima, saveb = *limb;
1072 *lima = *limb = '\0';
1073 diff = general_numcompare (texta, textb);
1074 *lima = savea, *limb = saveb;
1076 else
1077 diff = general_numcompare (texta, textb);
1079 if (diff)
1080 return key->reverse ? -diff : diff;
1081 continue;
1083 else if (key->month)
1085 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1086 if (diff)
1087 return key->reverse ? -diff : diff;
1088 continue;
1090 else if (ignore && translate)
1092 #define CMP_WITH_IGNORE(A, B) \
1093 do \
1095 while (texta < lima && textb < limb) \
1097 while (texta < lima && ignore[UCHAR (*texta)]) \
1098 ++texta; \
1099 while (textb < limb && ignore[UCHAR (*textb)]) \
1100 ++textb; \
1101 if (texta < lima && textb < limb) \
1103 if ((A) != (B)) \
1105 diff = (A) - (B); \
1106 break; \
1108 ++texta; \
1109 ++textb; \
1112 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1113 diff = -1; \
1114 else if (texta < lima && textb == limb \
1115 && !ignore[UCHAR (*texta)]) \
1116 diff = 1; \
1119 if (diff == 0) \
1121 while (texta < lima && ignore[UCHAR (*texta)]) \
1122 ++texta; \
1123 while (textb < limb && ignore[UCHAR (*textb)]) \
1124 ++textb; \
1126 if (texta == lima && textb < limb) \
1127 diff = -1; \
1128 else if (texta < lima && textb == limb) \
1129 diff = 1; \
1131 /* Relative lengths are meaningless if characters were ignored. \
1132 Handling this case here avoids what might be an invalid length \
1133 comparison below. */ \
1134 if (diff == 0 && texta == lima && textb == limb) \
1135 return 0; \
1137 while (0)
1139 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1140 else if (ignore)
1141 CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1142 else if (translate)
1143 while (texta < lima && textb < limb)
1145 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1147 diff = (translate[UCHAR (*--texta)]
1148 - translate[UCHAR (*--textb)]);
1149 break;
1152 else
1153 diff = memcmp (texta, textb, min (lena, lenb));
1155 if (diff)
1156 return key->reverse ? -diff : diff;
1157 if ((diff = lena - lenb) != 0)
1158 return key->reverse ? -diff : diff;
1161 return 0;
1164 /* Compare two lines A and B, returning negative, zero, or positive
1165 depending on whether A compares less than, equal to, or greater than B. */
1167 static int
1168 compare (register const struct line *a, register const struct line *b)
1170 int diff, tmpa, tmpb, mini;
1172 /* First try to compare on the specified keys (if any).
1173 The only two cases with no key at all are unadorned sort,
1174 and unadorned sort -r. */
1175 if (keyhead.next)
1177 diff = keycompare (a, b);
1178 if (diff != 0)
1179 return diff;
1180 if (unique || stable)
1181 return 0;
1184 /* If the keys all compare equal (or no keys were specified)
1185 fall through to the default byte-by-byte comparison. */
1186 tmpa = a->length, tmpb = b->length;
1187 mini = min (tmpa, tmpb);
1188 if (mini == 0)
1189 diff = tmpa - tmpb;
1190 else
1192 char *ap = a->text, *bp = b->text;
1194 diff = UCHAR (*ap) - UCHAR (*bp);
1195 if (diff == 0)
1197 diff = memcmp (ap, bp, mini);
1198 if (diff == 0)
1199 diff = tmpa - tmpb;
1203 return reverse ? -diff : diff;
1206 /* Check that the lines read from the given FP come in order. Return
1207 1 if they do and 0 if there is a disorder.
1208 FIXME: return number of first out-of-order line if not sorted. */
1210 static int
1211 checkfp (FILE *fp)
1213 struct buffer buf; /* Input buffer. */
1214 struct lines lines; /* Lines scanned from the buffer. */
1215 struct line temp; /* Copy of previous line. */
1216 int cc; /* Character count. */
1217 int alloc, sorted = 1;
1219 initbuf (&buf, mergealloc);
1220 initlines (&lines, mergealloc / linelength + 1,
1221 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1222 alloc = linelength;
1223 temp.text = xmalloc (alloc);
1225 cc = fillbuf (&buf, fp);
1226 if (cc == 0)
1227 goto finish;
1229 findlines (&buf, &lines);
1231 while (1)
1233 struct line *prev_line; /* Pointer to previous line. */
1234 int cmp; /* Result of calling compare. */
1235 int i;
1237 /* Compare each line in the buffer with its successor. */
1238 for (i = 0; i < lines.used - 1; ++i)
1240 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1241 if ((unique && cmp >= 0) || (cmp > 0))
1243 sorted = 0;
1244 goto finish;
1248 /* Save the last line of the buffer and refill the buffer. */
1249 prev_line = lines.lines + (lines.used - 1);
1250 if (prev_line->length > alloc)
1252 while (prev_line->length + 1 > alloc)
1253 alloc *= 2;
1254 temp.text = xrealloc (temp.text, alloc);
1256 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1257 temp.length = prev_line->length;
1258 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1259 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1261 cc = fillbuf (&buf, fp);
1262 if (cc == 0)
1263 break;
1265 findlines (&buf, &lines);
1266 /* Make sure the line saved from the old buffer contents is
1267 less than or equal to the first line of the new buffer. */
1268 cmp = compare (&temp, &lines.lines[0]);
1269 if ((unique && cmp >= 0) || (cmp > 0))
1271 sorted = 0;
1272 break;
1276 finish:
1277 xfclose (fp);
1278 free (buf.buf);
1279 free ((char *) lines.lines);
1280 free (temp.text);
1281 return sorted;
1284 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1285 Close FPS before returning. */
1287 static void
1288 mergefps (FILE **fps, register int nfps, FILE *ofp)
1290 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1291 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1292 struct line saved; /* Saved line for unique check. */
1293 int savedflag = 0; /* True if there is a saved line. */
1294 int savealloc; /* Size allocated for the saved line. */
1295 int cur[NMERGE]; /* Current line in each line table. */
1296 int ord[NMERGE]; /* Table representing a permutation of fps,
1297 such that lines[ord[0]].lines[cur[ord[0]]]
1298 is the smallest line and will be next
1299 output. */
1300 register int i, j, t;
1302 #ifdef lint /* Suppress `used before initialized' warning. */
1303 savealloc = 0;
1304 #endif
1306 /* Allocate space for a saved line if necessary. */
1307 if (unique)
1309 savealloc = linelength;
1310 saved.text = xmalloc (savealloc);
1313 /* Read initial lines from each input file. */
1314 for (i = 0; i < nfps; ++i)
1316 initbuf (&buffer[i], mergealloc);
1317 /* If a file is empty, eliminate it from future consideration. */
1318 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1320 xfclose (fps[i]);
1321 --nfps;
1322 for (j = i; j < nfps; ++j)
1323 fps[j] = fps[j + 1];
1325 if (i == nfps)
1326 free (buffer[i].buf);
1327 else
1329 initlines (&lines[i], mergealloc / linelength + 1,
1330 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1331 findlines (&buffer[i], &lines[i]);
1332 cur[i] = 0;
1336 /* Set up the ord table according to comparisons among input lines.
1337 Since this only reorders two items if one is strictly greater than
1338 the other, it is stable. */
1339 for (i = 0; i < nfps; ++i)
1340 ord[i] = i;
1341 for (i = 1; i < nfps; ++i)
1342 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1343 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1344 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1346 /* Repeatedly output the smallest line until no input remains. */
1347 while (nfps)
1349 /* If uniqified output is turned on, output only the first of
1350 an identical series of lines. */
1351 if (unique)
1353 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1355 write_bytes (saved.text, saved.length, ofp);
1356 putc (eolchar, ofp);
1357 savedflag = 0;
1359 if (!savedflag)
1361 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1363 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1364 savealloc *= 2;
1365 saved.text = xrealloc (saved.text, savealloc);
1367 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1368 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1369 saved.length + 1);
1370 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1372 saved.keybeg = saved.text +
1373 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1374 - lines[ord[0]].lines[cur[ord[0]]].text);
1376 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1378 saved.keylim = saved.text +
1379 (lines[ord[0]].lines[cur[ord[0]]].keylim
1380 - lines[ord[0]].lines[cur[ord[0]]].text);
1382 savedflag = 1;
1385 else
1387 write_bytes (lines[ord[0]].lines[cur[ord[0]]].text,
1388 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1389 putc (eolchar, ofp);
1392 /* Check if we need to read more lines into core. */
1393 if (++cur[ord[0]] == lines[ord[0]].used)
1394 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1396 findlines (&buffer[ord[0]], &lines[ord[0]]);
1397 cur[ord[0]] = 0;
1399 else
1401 /* We reached EOF on fps[ord[0]]. */
1402 for (i = 1; i < nfps; ++i)
1403 if (ord[i] > ord[0])
1404 --ord[i];
1405 --nfps;
1406 xfclose (fps[ord[0]]);
1407 free (buffer[ord[0]].buf);
1408 free ((char *) lines[ord[0]].lines);
1409 for (i = ord[0]; i < nfps; ++i)
1411 fps[i] = fps[i + 1];
1412 buffer[i] = buffer[i + 1];
1413 lines[i] = lines[i + 1];
1414 cur[i] = cur[i + 1];
1416 for (i = 0; i < nfps; ++i)
1417 ord[i] = ord[i + 1];
1418 continue;
1421 /* The new line just read in may be larger than other lines
1422 already in core; push it back in the queue until we encounter
1423 a line larger than it. */
1424 for (i = 1; i < nfps; ++i)
1426 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1427 &lines[ord[i]].lines[cur[ord[i]]]);
1428 if (!t)
1429 t = ord[0] - ord[i];
1430 if (t < 0)
1431 break;
1433 t = ord[0];
1434 for (j = 1; j < i; ++j)
1435 ord[j - 1] = ord[j];
1436 ord[i - 1] = t;
1439 if (unique && savedflag)
1441 write_bytes (saved.text, saved.length, ofp);
1442 putc (eolchar, ofp);
1443 free (saved.text);
1447 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1449 static void
1450 sortlines (struct line *lines, int nlines, struct line *temp)
1452 register struct line *lo, *hi, *t;
1453 register int nlo, nhi;
1455 if (nlines == 2)
1457 if (compare (&lines[0], &lines[1]) > 0)
1458 *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1459 return;
1462 nlo = nlines / 2;
1463 lo = lines;
1464 nhi = nlines - nlo;
1465 hi = lines + nlo;
1467 if (nlo > 1)
1468 sortlines (lo, nlo, temp);
1470 if (nhi > 1)
1471 sortlines (hi, nhi, temp);
1473 t = temp;
1475 while (nlo && nhi)
1476 if (compare (lo, hi) <= 0)
1477 *t++ = *lo++, --nlo;
1478 else
1479 *t++ = *hi++, --nhi;
1480 while (nlo--)
1481 *t++ = *lo++;
1483 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1484 *lo++ = *t++;
1487 /* Check that each of the NFILES FILES is ordered.
1488 Return a count of disordered files. */
1490 static int
1491 check (char **files, int nfiles)
1493 int i, disorders = 0;
1494 FILE *fp;
1496 for (i = 0; i < nfiles; ++i)
1498 fp = xfopen (files[i], "r");
1499 if (!checkfp (fp))
1501 fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]);
1502 ++disorders;
1505 return disorders;
1508 /* Merge NFILES FILES onto OFP. */
1510 static void
1511 merge (char **files, int nfiles, FILE *ofp)
1513 int i, j, t;
1514 char *temp;
1515 FILE *fps[NMERGE], *tfp;
1517 while (nfiles > NMERGE)
1519 t = 0;
1520 for (i = 0; i < nfiles / NMERGE; ++i)
1522 for (j = 0; j < NMERGE; ++j)
1523 fps[j] = xfopen (files[i * NMERGE + j], "r");
1524 tfp = xtmpfopen (temp = tempname ());
1525 mergefps (fps, NMERGE, tfp);
1526 xfclose (tfp);
1527 for (j = 0; j < NMERGE; ++j)
1528 zaptemp (files[i * NMERGE + j]);
1529 files[t++] = temp;
1531 for (j = 0; j < nfiles % NMERGE; ++j)
1532 fps[j] = xfopen (files[i * NMERGE + j], "r");
1533 tfp = xtmpfopen (temp = tempname ());
1534 mergefps (fps, nfiles % NMERGE, tfp);
1535 xfclose (tfp);
1536 for (j = 0; j < nfiles % NMERGE; ++j)
1537 zaptemp (files[i * NMERGE + j]);
1538 files[t++] = temp;
1539 nfiles = t;
1542 for (i = 0; i < nfiles; ++i)
1543 fps[i] = xfopen (files[i], "r");
1544 mergefps (fps, i, ofp);
1545 for (i = 0; i < nfiles; ++i)
1546 zaptemp (files[i]);
1549 /* Sort NFILES FILES onto OFP. */
1551 static void
1552 sort (char **files, int nfiles, FILE *ofp)
1554 struct buffer buf;
1555 struct lines lines;
1556 struct line *tmp;
1557 int i, ntmp;
1558 FILE *fp, *tfp;
1559 struct tempnode *node;
1560 int n_temp_files = 0;
1561 char **tempfiles;
1563 initbuf (&buf, sortalloc);
1564 initlines (&lines, sortalloc / linelength + 1,
1565 LINEALLOC / sizeof (struct line));
1566 ntmp = lines.alloc;
1567 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1569 while (nfiles--)
1571 fp = xfopen (*files++, "r");
1572 while (fillbuf (&buf, fp))
1574 findlines (&buf, &lines);
1575 if (lines.used > ntmp)
1577 while (lines.used > ntmp)
1578 ntmp *= 2;
1579 tmp = (struct line *)
1580 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1582 sortlines (lines.lines, lines.used, tmp);
1583 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1584 tfp = ofp;
1585 else
1587 ++n_temp_files;
1588 tfp = xtmpfopen (tempname ());
1590 for (i = 0; i < lines.used; ++i)
1591 if (!unique || i == 0
1592 || compare (&lines.lines[i], &lines.lines[i - 1]))
1594 write_bytes (lines.lines[i].text, lines.lines[i].length, tfp);
1595 putc (eolchar, tfp);
1597 if (tfp != ofp)
1598 xfclose (tfp);
1600 xfclose (fp);
1603 free (buf.buf);
1604 free ((char *) lines.lines);
1605 free ((char *) tmp);
1607 if (n_temp_files)
1609 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1610 i = n_temp_files;
1611 for (node = temphead.next; i > 0; node = node->next)
1612 tempfiles[--i] = node->name;
1613 merge (tempfiles, n_temp_files, ofp);
1614 free ((char *) tempfiles);
1618 /* Insert key KEY at the end of the list (`keyhead'). */
1620 static void
1621 insertkey (struct keyfield *key)
1623 struct keyfield *k = &keyhead;
1625 while (k->next)
1626 k = k->next;
1627 k->next = key;
1628 key->next = NULL;
1631 static void
1632 badfieldspec (const char *s)
1634 error (SORT_FAILURE, 0, _("invalid field specification `%s'"), s);
1637 /* Handle interrupts and hangups. */
1639 static void
1640 sighandler (int sig)
1642 #ifdef SA_INTERRUPT
1643 struct sigaction sigact;
1645 sigact.sa_handler = SIG_DFL;
1646 sigemptyset (&sigact.sa_mask);
1647 sigact.sa_flags = 0;
1648 sigaction (sig, &sigact, NULL);
1649 #else /* !SA_INTERRUPT */
1650 signal (sig, SIG_DFL);
1651 #endif /* SA_INTERRUPT */
1652 cleanup ();
1653 kill (getpid (), sig);
1656 /* Set the ordering options for KEY specified in S.
1657 Return the address of the first character in S that
1658 is not a valid ordering option.
1659 BLANKTYPE is the kind of blanks that 'b' should skip. */
1661 static char *
1662 set_ordering (register const char *s, struct keyfield *key,
1663 enum blanktype blanktype)
1665 while (*s)
1667 switch (*s)
1669 case 'b':
1670 if (blanktype == bl_start || blanktype == bl_both)
1671 key->skipsblanks = 1;
1672 if (blanktype == bl_end || blanktype == bl_both)
1673 key->skipeblanks = 1;
1674 break;
1675 case 'd':
1676 key->ignore = nondictionary;
1677 break;
1678 case 'f':
1679 key->translate = fold_toupper;
1680 break;
1681 case 'g':
1682 key->general_numeric = 1;
1683 break;
1684 case 'i':
1685 key->ignore = nonprinting;
1686 break;
1687 case 'M':
1688 key->month = 1;
1689 break;
1690 case 'n':
1691 key->numeric = 1;
1692 if (blanktype == bl_start || blanktype == bl_both)
1693 key->skipsblanks = 1;
1694 if (blanktype == bl_end || blanktype == bl_both)
1695 key->skipeblanks = 1;
1696 break;
1697 case 'r':
1698 key->reverse = 1;
1699 break;
1700 default:
1701 return (char *) s;
1703 ++s;
1705 return (char *) s;
1709 main (int argc, char **argv)
1711 struct keyfield *key = NULL, gkey;
1712 char *s;
1713 int i, t, t2;
1714 int checkonly = 0, mergeonly = 0, nfiles = 0;
1715 char *minus = "-", *outfile = minus, **files, *tmp;
1716 FILE *ofp;
1717 #ifdef SA_INTERRUPT
1718 struct sigaction oldact, newact;
1719 #endif /* SA_INTERRUPT */
1721 program_name = argv[0];
1722 setlocale (LC_ALL, "");
1723 bindtextdomain (PACKAGE, LOCALEDIR);
1724 textdomain (PACKAGE);
1726 parse_long_options (argc, argv, "sort", PACKAGE_VERSION, usage);
1728 have_read_stdin = 0;
1729 inittables ();
1731 temp_file_prefix = getenv ("TMPDIR");
1732 if (temp_file_prefix == NULL)
1733 temp_file_prefix = DEFAULT_TMPDIR;
1735 #ifdef SA_INTERRUPT
1736 newact.sa_handler = sighandler;
1737 sigemptyset (&newact.sa_mask);
1738 newact.sa_flags = 0;
1740 sigaction (SIGINT, NULL, &oldact);
1741 if (oldact.sa_handler != SIG_IGN)
1742 sigaction (SIGINT, &newact, NULL);
1743 sigaction (SIGHUP, NULL, &oldact);
1744 if (oldact.sa_handler != SIG_IGN)
1745 sigaction (SIGHUP, &newact, NULL);
1746 sigaction (SIGPIPE, NULL, &oldact);
1747 if (oldact.sa_handler != SIG_IGN)
1748 sigaction (SIGPIPE, &newact, NULL);
1749 sigaction (SIGTERM, NULL, &oldact);
1750 if (oldact.sa_handler != SIG_IGN)
1751 sigaction (SIGTERM, &newact, NULL);
1752 #else /* !SA_INTERRUPT */
1753 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1754 signal (SIGINT, sighandler);
1755 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1756 signal (SIGHUP, sighandler);
1757 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1758 signal (SIGPIPE, sighandler);
1759 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1760 signal (SIGTERM, sighandler);
1761 #endif /* !SA_INTERRUPT */
1763 gkey.sword = gkey.eword = -1;
1764 gkey.ignore = NULL;
1765 gkey.translate = NULL;
1766 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1767 gkey.skipsblanks = gkey.skipeblanks = 0;
1769 files = (char **) xmalloc (sizeof (char *) * argc);
1771 for (i = 1; i < argc; ++i)
1773 if (argv[i][0] == '+')
1775 if (key)
1776 insertkey (key);
1777 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1778 key->eword = -1;
1779 key->ignore = NULL;
1780 key->translate = NULL;
1781 key->skipsblanks = key->skipeblanks = 0;
1782 key->numeric = key->general_numeric = key->month = key->reverse = 0;
1783 s = argv[i] + 1;
1784 if (! (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])])))
1785 badfieldspec (argv[i]);
1786 for (t = 0; digits[UCHAR (*s)]; ++s)
1787 t = 10 * t + *s - '0';
1788 t2 = 0;
1789 if (*s == '.')
1790 for (++s; digits[UCHAR (*s)]; ++s)
1791 t2 = 10 * t2 + *s - '0';
1792 if (t2 || t)
1794 key->sword = t;
1795 key->schar = t2;
1797 else
1798 key->sword = -1;
1799 s = set_ordering (s, key, bl_start);
1800 if (*s)
1801 badfieldspec (argv[i]);
1803 else if (argv[i][0] == '-' && argv[i][1])
1805 s = argv[i] + 1;
1806 if (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])]))
1808 if (!key)
1810 /* Provoke with `sort -9'. */
1811 error (0, 0, _("when using the old-style +POS and -POS \
1812 key specifiers,\nthe +POS specifier must come first"));
1813 usage (SORT_FAILURE);
1815 for (t = 0; digits[UCHAR (*s)]; ++s)
1816 t = t * 10 + *s - '0';
1817 t2 = 0;
1818 if (*s == '.')
1819 for (++s; digits[UCHAR (*s)]; ++s)
1820 t2 = t2 * 10 + *s - '0';
1821 key->eword = t;
1822 key->echar = t2;
1823 s = set_ordering (s, key, bl_end);
1824 if (*s)
1825 badfieldspec (argv[i]);
1826 insertkey (key);
1827 key = NULL;
1829 else
1830 while (*s)
1832 s = set_ordering (s, &gkey, bl_both);
1833 switch (*s)
1835 case '\0':
1836 break;
1837 case 'c':
1838 checkonly = 1;
1839 break;
1840 case 'k':
1841 if (s[1])
1842 ++s;
1843 else
1845 if (i == argc - 1)
1846 error (SORT_FAILURE, 0,
1847 _("option `-k' requires an argument"));
1848 else
1849 s = argv[++i];
1851 if (key)
1852 insertkey (key);
1853 key = (struct keyfield *)
1854 xmalloc (sizeof (struct keyfield));
1855 key->eword = -1;
1856 key->ignore = NULL;
1857 key->translate = NULL;
1858 key->skipsblanks = key->skipeblanks = 0;
1859 key->numeric = key->month = key->reverse = 0;
1860 /* Get POS1. */
1861 if (!digits[UCHAR (*s)])
1862 badfieldspec (argv[i]);
1863 for (t = 0; digits[UCHAR (*s)]; ++s)
1864 t = 10 * t + *s - '0';
1865 if (t == 0)
1867 /* Provoke with `sort -k0' */
1868 error (0, 0, _("the starting field number argument \
1869 to the `-k' option must be positive"));
1870 badfieldspec (argv[i]);
1872 --t;
1873 t2 = 0;
1874 if (*s == '.')
1876 if (!digits[UCHAR (s[1])])
1878 /* Provoke with `sort -k1.' */
1879 error (0, 0, _("starting field spec has `.' but \
1880 lacks following character offset"));
1881 badfieldspec (argv[i]);
1883 for (++s; digits[UCHAR (*s)]; ++s)
1884 t2 = 10 * t2 + *s - '0';
1885 if (t2 == 0)
1887 /* Provoke with `sort -k1.0' */
1888 error (0, 0, _("starting field character offset \
1889 argument to the `-k' option\nmust be positive"));
1890 badfieldspec (argv[i]);
1892 --t2;
1894 if (t2 || t)
1896 key->sword = t;
1897 key->schar = t2;
1899 else
1900 key->sword = -1;
1901 s = set_ordering (s, key, bl_start);
1902 if (*s == 0)
1904 key->eword = -1;
1905 key->echar = 0;
1907 else if (*s != ',')
1908 badfieldspec (argv[i]);
1909 else if (*s == ',')
1911 /* Skip over comma. */
1912 ++s;
1913 if (*s == 0)
1915 /* Provoke with `sort -k1,' */
1916 error (0, 0, _("field specification has `,' but \
1917 lacks following field spec"));
1918 badfieldspec (argv[i]);
1920 /* Get POS2. */
1921 for (t = 0; digits[UCHAR (*s)]; ++s)
1922 t = t * 10 + *s - '0';
1923 if (t == 0)
1925 /* Provoke with `sort -k1,0' */
1926 error (0, 0, _("ending field number argument \
1927 to the `-k' option must be positive"));
1928 badfieldspec (argv[i]);
1930 --t;
1931 t2 = 0;
1932 if (*s == '.')
1934 if (!digits[UCHAR (s[1])])
1936 /* Provoke with `sort -k1,1.' */
1937 error (0, 0, _("ending field spec has `.' \
1938 but lacks following character offset"));
1939 badfieldspec (argv[i]);
1941 for (++s; digits[UCHAR (*s)]; ++s)
1942 t2 = t2 * 10 + *s - '0';
1944 else
1946 /* `-k 2,3' is equivalent to `+1 -3'. */
1947 ++t;
1949 key->eword = t;
1950 key->echar = t2;
1951 s = set_ordering (s, key, bl_end);
1952 if (*s)
1953 badfieldspec (argv[i]);
1955 insertkey (key);
1956 key = NULL;
1957 goto outer;
1958 case 'm':
1959 mergeonly = 1;
1960 break;
1961 case 'o':
1962 if (s[1])
1963 outfile = s + 1;
1964 else
1966 if (i == argc - 1)
1967 error (SORT_FAILURE, 0,
1968 _("option `-o' requires an argument"));
1969 else
1970 outfile = argv[++i];
1972 goto outer;
1973 case 's':
1974 stable = 1;
1975 break;
1976 case 't':
1977 if (s[1])
1978 tab = *++s;
1979 else if (i < argc - 1)
1981 tab = *argv[++i];
1982 goto outer;
1984 else
1985 error (SORT_FAILURE, 0,
1986 _("option `-t' requires an argument"));
1987 break;
1988 case 'T':
1989 if (s[1])
1990 temp_file_prefix = ++s;
1991 else
1993 if (i < argc - 1)
1994 temp_file_prefix = argv[++i];
1995 else
1996 error (SORT_FAILURE, 0,
1997 _("option `-T' requires an argument"));
1999 goto outer;
2000 /* break; */
2001 case 'u':
2002 unique = 1;
2003 break;
2004 case 'z':
2005 eolchar = 0;
2006 break;
2007 case 'y':
2008 /* Accept and ignore e.g. -y0 for compatibility with
2009 Solaris 2. */
2010 goto outer;
2011 default:
2012 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2013 argv[0], *s);
2014 usage (SORT_FAILURE);
2016 if (*s)
2017 ++s;
2020 else /* Not an option. */
2022 files[nfiles++] = argv[i];
2024 outer:;
2027 if (key)
2028 insertkey (key);
2030 /* Inheritance of global options to individual keys. */
2031 for (key = keyhead.next; key; key = key->next)
2032 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2033 && !key->skipeblanks && !key->month && !key->numeric
2034 && !key->general_numeric)
2036 key->ignore = gkey.ignore;
2037 key->translate = gkey.translate;
2038 key->skipsblanks = gkey.skipsblanks;
2039 key->skipeblanks = gkey.skipeblanks;
2040 key->month = gkey.month;
2041 key->numeric = gkey.numeric;
2042 key->general_numeric = gkey.general_numeric;
2043 key->reverse = gkey.reverse;
2046 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2047 || gkey.skipeblanks || gkey.month || gkey.numeric
2048 || gkey.general_numeric))
2049 insertkey (&gkey);
2050 reverse = gkey.reverse;
2052 if (nfiles == 0)
2054 nfiles = 1;
2055 files = &minus;
2058 if (checkonly)
2060 /* POSIX requires that sort return 1 IFF invoked with -c and the
2061 input is not properly sorted. */
2062 exit (check (files, nfiles) == 0 ? 0 : 1);
2065 if (strcmp (outfile, "-"))
2067 struct stat outstat;
2068 if (stat (outfile, &outstat) == 0)
2070 /* The following code prevents a race condition when
2071 people use the brain dead shell programming idiom:
2072 cat file | sort -o file
2073 This feature is provided for historical compatibility,
2074 but we strongly discourage ever relying on this in
2075 new shell programs. */
2077 /* Temporarily copy each input file that might be another name
2078 for the output file. When in doubt (e.g. a pipe), copy. */
2079 for (i = 0; i < nfiles; ++i)
2081 char buf[8192];
2082 FILE *fp;
2083 int cc;
2085 if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
2087 struct stat instat;
2088 if ((strcmp (files[i], "-")
2089 ? stat (files[i], &instat)
2090 : fstat (STDIN_FILENO, &instat)) != 0)
2092 error (0, errno, "%s", files[i]);
2093 cleanup ();
2094 exit (SORT_FAILURE);
2096 if (S_ISREG (instat.st_mode)
2097 && (instat.st_ino != outstat.st_ino
2098 || instat.st_dev != outstat.st_dev))
2100 /* We know the files are distinct. */
2101 continue;
2105 fp = xfopen (files[i], "r");
2106 tmp = tempname ();
2107 ofp = xtmpfopen (tmp);
2108 while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
2109 write_bytes (buf, cc, ofp);
2110 if (ferror (fp))
2112 error (0, errno, "%s", files[i]);
2113 cleanup ();
2114 exit (SORT_FAILURE);
2116 xfclose (ofp);
2117 xfclose (fp);
2118 files[i] = tmp;
2121 ofp = xfopen (outfile, "w");
2123 else
2124 ofp = stdout;
2126 if (mergeonly)
2127 merge (files, nfiles, ofp);
2128 else
2129 sort (files, nfiles, ofp);
2130 cleanup ();
2132 /* If we wait for the implicit flush on exit, and the parent process
2133 has closed stdout (e.g., exec >&- in a shell), then the output file
2134 winds up empty. I don't understand why. This is under SunOS,
2135 Solaris, Ultrix, and Irix. This premature fflush makes the output
2136 reappear. --karl@cs.umb.edu */
2137 if (fflush (ofp) < 0)
2138 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2140 if (have_read_stdin && fclose (stdin) == EOF)
2141 error (SORT_FAILURE, errno, outfile);
2142 if (ferror (stdout) || fclose (stdout) == EOF)
2143 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2145 exit (EXIT_SUCCESS);