.
[coreutils.git] / src / sort.c
blob8e272329815af7bae32ac7d13983b70f2ccc18e9
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991, 1992, 1993, 1994, 1995 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
22 #include <config.h>
24 /* Get isblank from GNU libc. */
25 #define _GNU_SOURCE
27 #include <sys/types.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #include "system.h"
31 #include "version.h"
32 #include "long-options.h"
33 #include "error.h"
34 #include "xstrtod.h"
36 #ifdef HAVE_LIMITS_H
37 #include <limits.h>
38 #else
39 #ifndef UCHAR_MAX
40 #define UCHAR_MAX 255
41 #endif
42 #endif
43 #ifndef STDC_HEADERS
44 char *malloc ();
45 char *realloc ();
46 void free ();
47 #endif
49 /* Undefine, to avoid warning about redefinition on some systems. */
50 #undef min
51 #define min(a, b) ((a) < (b) ? (a) : (b))
53 #define UCHAR_LIM (UCHAR_MAX + 1)
54 #define UCHAR(c) ((unsigned char) (c))
56 #ifndef DEFAULT_TMPDIR
57 #define DEFAULT_TMPDIR "/tmp"
58 #endif
60 /* The kind of blanks for '-b' to skip in various options. */
61 enum blanktype { bl_start, bl_end, bl_both };
63 /* The character marking end of line. Default to \n. */
64 int eolchar = '\n';
66 /* Lines are held in core as counted strings. */
67 struct line
69 char *text; /* Text of the line. */
70 int length; /* Length not including final newline. */
71 char *keybeg; /* Start of first key. */
72 char *keylim; /* Limit of first key. */
75 /* Arrays of lines. */
76 struct lines
78 struct line *lines; /* Dynamically allocated array of lines. */
79 int used; /* Number of slots used. */
80 int alloc; /* Number of slots allocated. */
81 int limit; /* Max number of slots to allocate. */
84 /* Input buffers. */
85 struct buffer
87 char *buf; /* Dynamically allocated buffer. */
88 int used; /* Number of bytes used. */
89 int alloc; /* Number of bytes allocated. */
90 int left; /* Number of bytes left after line parsing. */
93 struct keyfield
95 int sword; /* Zero-origin 'word' to start at. */
96 int schar; /* Additional characters to skip. */
97 int skipsblanks; /* Skip leading white space at start. */
98 int eword; /* Zero-origin first word after field. */
99 int echar; /* Additional characters in field. */
100 int skipeblanks; /* Skip trailing white space at finish. */
101 int *ignore; /* Boolean array of characters to ignore. */
102 char *translate; /* Translation applied to characters. */
103 int numeric; /* Flag for numeric comparison. Handle
104 strings of digits with optional decimal
105 point, but no exponential notation. */
106 int general_numeric; /* Flag for general, numeric comparison.
107 Handle numbers in exponential notation. */
108 int month; /* Flag for comparison by month name. */
109 int reverse; /* Reverse the sense of comparison. */
110 struct keyfield *next; /* Next keyfield to try. */
113 struct month
115 char *name;
116 int val;
119 /* The name this program was run with. */
120 char *program_name;
122 /* Table of digits. */
123 static int digits[UCHAR_LIM];
125 /* Table of white space. */
126 static int blanks[UCHAR_LIM];
128 /* Table of non-printing characters. */
129 static int nonprinting[UCHAR_LIM];
131 /* Table of non-dictionary characters (not letters, digits, or blanks). */
132 static int nondictionary[UCHAR_LIM];
134 /* Translation table folding lower case to upper. */
135 static char fold_toupper[UCHAR_LIM];
137 /* Table mapping 3-letter month names to integers.
138 Alphabetic order allows binary search. */
139 static struct month const monthtab[] =
141 {"APR", 4},
142 {"AUG", 8},
143 {"DEC", 12},
144 {"FEB", 2},
145 {"JAN", 1},
146 {"JUL", 7},
147 {"JUN", 6},
148 {"MAR", 3},
149 {"MAY", 5},
150 {"NOV", 11},
151 {"OCT", 10},
152 {"SEP", 9}
155 /* During the merge phase, the number of files to merge at once. */
156 #define NMERGE 16
158 /* Initial buffer size for in core sorting. Will not grow unless a
159 line longer than this is seen. */
160 static int sortalloc = 512 * 1024;
162 /* Initial buffer size for in core merge buffers. Bear in mind that
163 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
164 static int mergealloc = 16 * 1024;
166 /* Guess of average line length. */
167 static int linelength = 30;
169 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
170 #define LINEALLOC (256 * 1024)
172 /* Prefix for temporary file names. */
173 static char *temp_file_prefix;
175 /* Flag to reverse the order of all comparisons. */
176 static int reverse;
178 /* Flag for stable sort. This turns off the last ditch bytewise
179 comparison of lines, and instead leaves lines in the same order
180 they were read if all keys compare equal. */
181 static int stable;
183 /* Tab character separating fields. If NUL, then fields are separated
184 by the empty string between a non-whitespace character and a whitespace
185 character. */
186 static char tab;
188 /* Flag to remove consecutive duplicate lines from the output.
189 Only the last of a sequence of equal lines will be output. */
190 static int unique;
192 /* Nonzero if any of the input files are the standard input. */
193 static int have_read_stdin;
195 /* Lists of key field comparisons to be tried. */
196 static struct keyfield keyhead;
198 static void
199 usage (int status)
201 if (status != 0)
202 fprintf (stderr, _("Try `%s --help' for more information.\n"),
203 program_name);
204 else
206 printf (_("\
207 Usage: %s [OPTION]... [FILE]...\n\
209 program_name);
210 printf (_("\
211 Write sorted concatenation of all FILE(s) to standard output.\n\
213 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
214 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
215 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
216 -b ignore leading blanks in sort fields or keys\n\
217 -c check if given files already sorted, do not sort\n\
218 -d consider only [a-zA-Z0-9 ] characters in keys\n\
219 -f fold lower case to upper case characters in keys\n\
220 -g compare according to general numerical value, imply -b\n\
221 -i consider only [\\040-\\0176] characters in keys\n\
222 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
223 -m merge already sorted files, do not sort\n\
224 -n compare according to string numerical value, imply -b\n\
225 -o FILE write result on FILE instead of standard output\n\
226 -r reverse the result of comparisons\n\
227 -s stabilize sort by disabling last resort comparison\n\
228 -t SEP use SEParator instead of non- to whitespace transition\n\
229 -u with -c, check for strict ordering\n\
230 -u with -m, only output the first of an equal sequence\n\
231 -z end lines with 0 byte, not newline, for find -print0\n\
232 --help display this help and exit\n\
233 --version output version information and exit\n\
235 POS is F[.C][OPTS], where F is the field number and C the character\n\
236 position in the field, both counted from zero. OPTS is made up of one\n\
237 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
238 for that key. If no key given, use the entire line as key. With no\n\
239 FILE, or when FILE is -, read standard input.\n\
241 , DEFAULT_TMPDIR);
243 exit (status);
246 /* The list of temporary files. */
247 static struct tempnode
249 char *name;
250 struct tempnode *next;
251 } temphead;
253 /* Clean up any remaining temporary files. */
255 static void
256 cleanup (void)
258 struct tempnode *node;
260 for (node = temphead.next; node; node = node->next)
261 unlink (node->name);
264 /* Allocate N bytes of memory dynamically, with error checking. */
266 static char *
267 xmalloc (unsigned int n)
269 char *p;
271 p = malloc (n);
272 if (p == 0)
274 error (0, 0, _("virtual memory exhausted"));
275 cleanup ();
276 exit (2);
278 return p;
281 /* Change the size of an allocated block of memory P to N bytes,
282 with error checking.
283 If P is NULL, run xmalloc.
284 If N is 0, run free and return NULL. */
286 static char *
287 xrealloc (char *p, unsigned int n)
289 if (p == 0)
290 return xmalloc (n);
291 if (n == 0)
293 free (p);
294 return 0;
296 p = realloc (p, n);
297 if (p == 0)
299 error (0, 0, _("virtual memory exhausted"));
300 cleanup ();
301 exit (2);
303 return p;
306 static FILE *
307 xtmpfopen (const char *file)
309 FILE *fp;
310 int fd;
312 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
313 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
315 error (0, errno, "%s", file);
316 cleanup ();
317 exit (2);
320 return fp;
323 static FILE *
324 xfopen (const char *file, const char *how)
326 FILE *fp;
328 if (strcmp (file, "-") == 0)
330 fp = stdin;
332 else
334 if ((fp = fopen (file, how)) == NULL)
336 error (0, errno, "%s", file);
337 cleanup ();
338 exit (2);
342 if (fp == stdin)
343 have_read_stdin = 1;
344 return fp;
347 static void
348 xfclose (FILE *fp)
350 if (fp == stdin)
352 /* Allow reading stdin from tty more than once. */
353 if (feof (fp))
354 clearerr (fp);
356 else if (fp == stdout)
358 if (fflush (fp) != 0)
360 error (0, errno, _("flushing file"));
361 cleanup ();
362 exit (2);
365 else
367 if (fclose (fp) != 0)
369 error (0, errno, _("error closing file"));
370 cleanup ();
371 exit (2);
376 static void
377 write_bytes (const char *buf, size_t n_bytes, FILE *fp)
379 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
381 error (0, errno, _("write error"));
382 cleanup ();
383 exit (2);
387 /* Return a name for a temporary file. */
389 static char *
390 tempname (void)
392 static unsigned int seq;
393 int len = strlen (temp_file_prefix);
394 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
395 struct tempnode *node;
397 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
398 sprintf (name,
399 "%s%ssort%5.5d%5.5d",
400 temp_file_prefix,
401 (len && temp_file_prefix[len - 1] != '/') ? "/" : "",
402 (unsigned int) getpid () & 0xffff, seq);
404 /* Make sure that SEQ's value fits in 5 digits. */
405 ++seq;
406 if (seq >= 100000)
407 seq = 0;
409 node->name = name;
410 node->next = temphead.next;
411 temphead.next = node;
412 return name;
415 /* Search through the list of temporary files for NAME;
416 remove it if it is found on the list. */
418 static void
419 zaptemp (char *name)
421 struct tempnode *node, *temp;
423 for (node = &temphead; node->next; node = node->next)
424 if (!strcmp (name, node->next->name))
425 break;
426 if (node->next)
428 temp = node->next;
429 unlink (temp->name);
430 free (temp->name);
431 node->next = temp->next;
432 free ((char *) temp);
436 /* Initialize the character class tables. */
438 static void
439 inittables (void)
441 int i;
443 for (i = 0; i < UCHAR_LIM; ++i)
445 if (ISBLANK (i))
446 blanks[i] = 1;
447 if (ISDIGIT (i))
448 digits[i] = 1;
449 if (!ISPRINT (i))
450 nonprinting[i] = 1;
451 if (!ISALNUM (i) && !ISBLANK (i))
452 nondictionary[i] = 1;
453 if (ISLOWER (i))
454 fold_toupper[i] = toupper (i);
455 else
456 fold_toupper[i] = i;
460 /* Initialize BUF, allocating ALLOC bytes initially. */
462 static void
463 initbuf (struct buffer *buf, int alloc)
465 buf->alloc = alloc;
466 buf->buf = xmalloc (buf->alloc);
467 buf->used = buf->left = 0;
470 /* Fill BUF reading from FP, moving buf->left bytes from the end
471 of buf->buf to the beginning first. If EOF is reached and the
472 file wasn't terminated by a newline, supply one. Return a count
473 of bytes buffered. */
475 static int
476 fillbuf (struct buffer *buf, FILE *fp)
478 int cc;
480 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
481 buf->used = buf->left;
483 while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, eolchar, buf->used)))
485 if (buf->used == buf->alloc)
487 buf->alloc *= 2;
488 buf->buf = xrealloc (buf->buf, buf->alloc);
490 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
491 if (ferror (fp))
493 error (0, errno, _("read error"));
494 cleanup ();
495 exit (2);
497 buf->used += cc;
500 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
502 if (buf->used == buf->alloc)
504 buf->alloc *= 2;
505 buf->buf = xrealloc (buf->buf, buf->alloc);
507 buf->buf[buf->used++] = eolchar;
510 return buf->used;
513 /* Initialize LINES, allocating space for ALLOC lines initially.
514 LIMIT is the maximum possible number of lines to allocate space
515 for, ever. */
517 static void
518 initlines (struct lines *lines, int alloc, int limit)
520 lines->alloc = alloc;
521 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
522 lines->used = 0;
523 lines->limit = limit;
526 /* Return a pointer to the first character of the field specified
527 by KEY in LINE. */
529 static char *
530 begfield (const struct line *line, const struct keyfield *key)
532 register char *ptr = line->text, *lim = ptr + line->length;
533 register int sword = key->sword, schar = key->schar;
535 if (tab)
536 while (ptr < lim && sword--)
538 while (ptr < lim && *ptr != tab)
539 ++ptr;
540 if (ptr < lim)
541 ++ptr;
543 else
544 while (ptr < lim && sword--)
546 while (ptr < lim && blanks[UCHAR (*ptr)])
547 ++ptr;
548 while (ptr < lim && !blanks[UCHAR (*ptr)])
549 ++ptr;
552 if (key->skipsblanks)
553 while (ptr < lim && blanks[UCHAR (*ptr)])
554 ++ptr;
556 if (ptr + schar <= lim)
557 ptr += schar;
558 else
559 ptr = lim;
561 return ptr;
564 /* Return the limit of (a pointer to the first character after) the field
565 in LINE specified by KEY. */
567 static char *
568 limfield (const struct line *line, const struct keyfield *key)
570 register char *ptr = line->text, *lim = ptr + line->length;
571 register int eword = key->eword, echar = key->echar;
573 /* Note: from the POSIX spec:
574 The leading field separator itself is included in
575 a field when -t is not used. FIXME: move this comment up... */
577 /* Move PTR past EWORD fields or to one past the last byte on LINE,
578 whichever comes first. If there are more than EWORD fields, leave
579 PTR pointing at the beginning of the field having zero-based index,
580 EWORD. If a delimiter character was specified (via -t), then that
581 `beginning' is the first character following the delimiting TAB.
582 Otherwise, leave PTR pointing at the first `blank' character after
583 the preceding field. */
584 if (tab)
585 while (ptr < lim && eword--)
587 while (ptr < lim && *ptr != tab)
588 ++ptr;
589 if (ptr < lim && (eword || echar > 0))
590 ++ptr;
592 else
593 while (ptr < lim && eword--)
595 while (ptr < lim && blanks[UCHAR (*ptr)])
596 ++ptr;
597 while (ptr < lim && !blanks[UCHAR (*ptr)])
598 ++ptr;
601 /* Make LIM point to the end of (one byte past) the current field. */
602 if (tab)
604 char *newlim;
605 newlim = memchr (ptr, tab, lim - ptr);
606 if (newlim)
607 lim = newlim;
609 else
611 char *newlim;
612 newlim = ptr;
613 while (newlim < lim && blanks[UCHAR (*newlim)])
614 ++newlim;
615 while (newlim < lim && !blanks[UCHAR (*newlim)])
616 ++newlim;
617 lim = newlim;
620 /* If we're skipping leading blanks, don't start counting characters
621 until after skipping past any leading blanks. */
622 if (key->skipsblanks)
623 while (ptr < lim && blanks[UCHAR (*ptr)])
624 ++ptr;
626 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
627 if (ptr + echar <= lim)
628 ptr += echar;
629 else
630 ptr = lim;
632 return ptr;
635 /* FIXME */
637 void
638 trim_trailing_blanks (const char *a_start, char **a_end)
640 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
641 --(*a_end);
644 /* Find the lines in BUF, storing pointers and lengths in LINES.
645 Also replace newlines in BUF with NULs. */
647 static void
648 findlines (struct buffer *buf, struct lines *lines)
650 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
651 struct keyfield *key = keyhead.next;
653 lines->used = 0;
655 while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))
656 && lines->used < lines->limit)
658 /* There are various places in the code that rely on a NUL
659 being at the end of in-core lines; NULs inside the lines
660 will not cause trouble, though. */
661 *ptr = '\0';
663 if (lines->used == lines->alloc)
665 lines->alloc *= 2;
666 lines->lines = (struct line *)
667 xrealloc ((char *) lines->lines,
668 lines->alloc * sizeof (struct line));
671 lines->lines[lines->used].text = beg;
672 lines->lines[lines->used].length = ptr - beg;
674 /* Precompute the position of the first key for efficiency. */
675 if (key)
677 if (key->eword >= 0)
678 lines->lines[lines->used].keylim =
679 limfield (&lines->lines[lines->used], key);
680 else
681 lines->lines[lines->used].keylim = ptr;
683 if (key->sword >= 0)
684 lines->lines[lines->used].keybeg =
685 begfield (&lines->lines[lines->used], key);
686 else
688 if (key->skipsblanks)
689 while (blanks[UCHAR (*beg)])
690 ++beg;
691 lines->lines[lines->used].keybeg = beg;
693 if (key->skipeblanks)
695 trim_trailing_blanks (lines->lines[lines->used].keybeg,
696 &lines->lines[lines->used].keylim);
699 else
701 lines->lines[lines->used].keybeg = 0;
702 lines->lines[lines->used].keylim = 0;
705 ++lines->used;
706 beg = ptr + 1;
709 buf->left = lim - beg;
712 /* Compare strings A and B containing decimal fractions < 1. Each string
713 should begin with a decimal point followed immediately by the digits
714 of the fraction. Strings not of this form are considered to be zero. */
716 static int
717 fraccompare (register const char *a, register const char *b)
719 register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
721 if (tmpa == '.' && tmpb == '.')
724 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
725 while (tmpa == tmpb && digits[tmpa]);
726 if (digits[tmpa] && digits[tmpb])
727 return tmpa - tmpb;
728 if (digits[tmpa])
730 while (tmpa == '0')
731 tmpa = UCHAR (*++a);
732 if (digits[tmpa])
733 return 1;
734 return 0;
736 if (digits[tmpb])
738 while (tmpb == '0')
739 tmpb = UCHAR (*++b);
740 if (digits[tmpb])
741 return -1;
742 return 0;
744 return 0;
746 else if (tmpa == '.')
749 tmpa = UCHAR (*++a);
750 while (tmpa == '0');
751 if (digits[tmpa])
752 return 1;
753 return 0;
755 else if (tmpb == '.')
758 tmpb = UCHAR (*++b);
759 while (tmpb == '0');
760 if (digits[tmpb])
761 return -1;
762 return 0;
764 return 0;
767 /* Compare strings A and B as numbers without explicitly converting them to
768 machine numbers. Comparatively slow for short strings, but asymptotically
769 hideously fast. */
771 static int
772 numcompare (register const char *a, register const char *b)
774 register int tmpa, tmpb, loga, logb, tmp;
776 tmpa = UCHAR (*a);
777 tmpb = UCHAR (*b);
779 while (blanks[tmpa])
780 tmpa = UCHAR (*++a);
781 while (blanks[tmpb])
782 tmpb = UCHAR (*++b);
784 if (tmpa == '-')
787 tmpa = UCHAR (*++a);
788 while (tmpa == '0');
789 if (tmpb != '-')
791 if (tmpa == '.')
793 tmpa = UCHAR (*++a);
794 while (tmpa == '0');
795 if (digits[tmpa])
796 return -1;
797 while (tmpb == '0')
798 tmpb = UCHAR (*++b);
799 if (tmpb == '.')
801 tmpb = *++b;
802 while (tmpb == '0');
803 if (digits[tmpb])
804 return -1;
805 return 0;
808 tmpb = UCHAR (*++b);
809 while (tmpb == '0');
811 while (tmpa == tmpb && digits[tmpa])
812 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
814 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
815 return -fraccompare (a, b);
817 if (digits[tmpa])
818 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
820 else
821 loga = 0;
823 if (digits[tmpb])
824 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
826 else
827 logb = 0;
829 if ((tmp = logb - loga) != 0)
830 return tmp;
832 if (!loga)
833 return 0;
835 return tmpb - tmpa;
837 else if (tmpb == '-')
840 tmpb = UCHAR (*++b);
841 while (tmpb == '0');
842 if (tmpb == '.')
844 tmpb = *++b;
845 while (tmpb == '0');
846 if (digits[tmpb])
847 return 1;
848 while (tmpa == '0')
849 tmpa = UCHAR (*++a);
850 if (tmpa == '.')
852 tmpa = UCHAR (*++a);
853 while (tmpa == '0');
854 if (digits[tmpa])
855 return 1;
856 return 0;
858 else
860 while (tmpa == '0')
861 tmpa = UCHAR (*++a);
862 while (tmpb == '0')
863 tmpb = UCHAR (*++b);
865 while (tmpa == tmpb && digits[tmpa])
866 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
868 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
869 return fraccompare (a, b);
871 if (digits[tmpa])
872 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
874 else
875 loga = 0;
877 if (digits[tmpb])
878 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
880 else
881 logb = 0;
883 if ((tmp = loga - logb) != 0)
884 return tmp;
886 if (!loga)
887 return 0;
889 return tmpa - tmpb;
893 static int
894 general_numcompare (const char *sa, const char *sb)
896 double a, b;
897 /* FIXME: add option to warn about failed conversions. */
898 /* FIXME: maybe add option to try expensive FP conversion
899 only if A and B can't be compared more cheaply/accurately. */
900 if (xstrtod (sa, NULL, &a))
902 a = 0;
904 if (xstrtod (sb, NULL, &b))
906 b = 0;
908 return a == b ? 0 : a < b ? -1 : 1;
911 /* Return an integer <= 12 associated with month name S with length LEN,
912 0 if the name in S is not recognized. */
914 static int
915 getmonth (const char *s, int len)
917 char month[4];
918 register int i, lo = 0, hi = 12;
920 while (len > 0 && blanks[UCHAR(*s)])
921 ++s, --len;
923 if (len < 3)
924 return 0;
926 for (i = 0; i < 3; ++i)
927 month[i] = fold_toupper[UCHAR (s[i])];
928 month[3] = '\0';
930 while (hi - lo > 1)
931 if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
932 hi = (lo + hi) / 2;
933 else
934 lo = (lo + hi) / 2;
935 if (!strcmp (month, monthtab[lo].name))
936 return monthtab[lo].val;
937 return 0;
940 /* Compare two lines A and B trying every key in sequence until there
941 are no more keys or a difference is found. */
943 static int
944 keycompare (const struct line *a, const struct line *b)
946 register char *texta, *textb, *lima, *limb, *translate;
947 register int *ignore;
948 struct keyfield *key;
949 int diff = 0, iter = 0, lena, lenb;
951 for (key = keyhead.next; key; key = key->next, ++iter)
953 ignore = key->ignore;
954 translate = key->translate;
956 /* Find the beginning and limit of each field. */
957 if (iter || a->keybeg == NULL || b->keybeg == NULL)
959 if (key->eword >= 0)
960 lima = limfield (a, key), limb = limfield (b, key);
961 else
962 lima = a->text + a->length, limb = b->text + b->length;
964 if (key->sword >= 0)
965 texta = begfield (a, key), textb = begfield (b, key);
966 else
968 texta = a->text, textb = b->text;
969 if (key->skipsblanks)
971 while (texta < lima && blanks[UCHAR (*texta)])
972 ++texta;
973 while (textb < limb && blanks[UCHAR (*textb)])
974 ++textb;
978 else
980 /* For the first iteration only, the key positions have
981 been precomputed for us. */
982 texta = a->keybeg, lima = a->keylim;
983 textb = b->keybeg, limb = b->keylim;
986 /* Find the lengths. */
987 lena = lima - texta, lenb = limb - textb;
988 if (lena < 0)
989 lena = 0;
990 if (lenb < 0)
991 lenb = 0;
993 if (key->skipeblanks)
995 char *a_end = texta + lena;
996 char *b_end = textb + lenb;
997 trim_trailing_blanks (texta, &a_end);
998 trim_trailing_blanks (textb, &b_end);
999 lena = a_end - texta;
1000 lenb = b_end - textb;
1003 /* Actually compare the fields. */
1004 if (key->numeric)
1006 if (*lima || *limb)
1008 char savea = *lima, saveb = *limb;
1010 *lima = *limb = '\0';
1011 diff = numcompare (texta, textb);
1012 *lima = savea, *limb = saveb;
1014 else
1015 diff = numcompare (texta, textb);
1017 if (diff)
1018 return key->reverse ? -diff : diff;
1019 continue;
1021 else if (key->general_numeric)
1023 if (*lima || *limb)
1025 char savea = *lima, saveb = *limb;
1027 *lima = *limb = '\0';
1028 diff = general_numcompare (texta, textb);
1029 *lima = savea, *limb = saveb;
1031 else
1032 diff = general_numcompare (texta, textb);
1034 if (diff)
1035 return key->reverse ? -diff : diff;
1036 continue;
1038 else if (key->month)
1040 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1041 if (diff)
1042 return key->reverse ? -diff : diff;
1043 continue;
1045 else if (ignore && translate)
1047 #define CMP_WITH_IGNORE(A, B) \
1048 do \
1050 while (texta < lima && textb < limb) \
1052 while (texta < lima && ignore[UCHAR (*texta)]) \
1053 ++texta; \
1054 while (textb < limb && ignore[UCHAR (*textb)]) \
1055 ++textb; \
1056 if (texta < lima && textb < limb) \
1058 if ((A) != (B)) \
1060 diff = (A) - (B); \
1061 break; \
1063 ++texta; \
1064 ++textb; \
1067 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1068 diff = -1; \
1069 else if (texta < lima && textb == limb \
1070 && !ignore[UCHAR (*texta)]) \
1071 diff = 1; \
1074 if (diff == 0) \
1076 while (texta < lima && ignore[UCHAR (*texta)]) \
1077 ++texta; \
1078 while (textb < limb && ignore[UCHAR (*textb)]) \
1079 ++textb; \
1081 if (texta == lima && textb < limb) \
1082 diff = -1; \
1083 else if (texta < lima && textb == limb) \
1084 diff = 1; \
1086 /* Relative lengths are meaningless if characters were ignored. \
1087 Handling this case here avoids what might be an invalid length \
1088 comparison below. */ \
1089 if (diff == 0 && texta == lima && textb == limb) \
1090 return 0; \
1092 while (0)
1094 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1095 else if (ignore)
1096 CMP_WITH_IGNORE (*texta, *textb);
1097 else if (translate)
1098 while (texta < lima && textb < limb)
1100 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1102 diff = (translate[UCHAR (*--texta)]
1103 - translate[UCHAR (*--textb)]);
1104 break;
1107 else
1108 diff = memcmp (texta, textb, min (lena, lenb));
1110 if (diff)
1111 return key->reverse ? -diff : diff;
1112 if ((diff = lena - lenb) != 0)
1113 return key->reverse ? -diff : diff;
1116 return 0;
1119 /* Compare two lines A and B, returning negative, zero, or positive
1120 depending on whether A compares less than, equal to, or greater than B. */
1122 static int
1123 compare (register const struct line *a, register const struct line *b)
1125 int diff, tmpa, tmpb, mini;
1127 /* First try to compare on the specified keys (if any).
1128 The only two cases with no key at all are unadorned sort,
1129 and unadorned sort -r. */
1130 if (keyhead.next)
1132 diff = keycompare (a, b);
1133 if (diff != 0)
1134 return diff;
1135 if (unique || stable)
1136 return 0;
1139 /* If the keys all compare equal (or no keys were specified)
1140 fall through to the default byte-by-byte comparison. */
1141 tmpa = a->length, tmpb = b->length;
1142 mini = min (tmpa, tmpb);
1143 if (mini == 0)
1144 diff = tmpa - tmpb;
1145 else
1147 char *ap = a->text, *bp = b->text;
1149 diff = UCHAR (*ap) - UCHAR (*bp);
1150 if (diff == 0)
1152 diff = memcmp (ap, bp, mini);
1153 if (diff == 0)
1154 diff = tmpa - tmpb;
1158 return reverse ? -diff : diff;
1161 /* Check that the lines read from the given FP come in order. Return
1162 1 if they do and 0 if there is a disorder.
1163 FIXME: return number of first out-of-order line if not sorted. */
1165 static int
1166 checkfp (FILE *fp)
1168 struct buffer buf; /* Input buffer. */
1169 struct lines lines; /* Lines scanned from the buffer. */
1170 struct line temp; /* Copy of previous line. */
1171 int cc; /* Character count. */
1172 int alloc, sorted = 1;
1174 initbuf (&buf, mergealloc);
1175 initlines (&lines, mergealloc / linelength + 1,
1176 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1177 alloc = linelength;
1178 temp.text = xmalloc (alloc);
1180 cc = fillbuf (&buf, fp);
1181 if (cc == 0)
1182 goto finish;
1184 findlines (&buf, &lines);
1186 while (1)
1188 struct line *prev_line; /* Pointer to previous line. */
1189 int cmp; /* Result of calling compare. */
1190 int i;
1192 /* Compare each line in the buffer with its successor. */
1193 for (i = 0; i < lines.used - 1; ++i)
1195 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1196 if ((unique && cmp >= 0) || (cmp > 0))
1198 sorted = 0;
1199 goto finish;
1203 /* Save the last line of the buffer and refill the buffer. */
1204 prev_line = lines.lines + (lines.used - 1);
1205 if (prev_line->length > alloc)
1207 while (prev_line->length + 1 > alloc)
1208 alloc *= 2;
1209 temp.text = xrealloc (temp.text, alloc);
1211 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1212 temp.length = prev_line->length;
1213 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1214 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1216 cc = fillbuf (&buf, fp);
1217 if (cc == 0)
1218 break;
1220 findlines (&buf, &lines);
1221 /* Make sure the line saved from the old buffer contents is
1222 less than or equal to the first line of the new buffer. */
1223 cmp = compare (&temp, &lines.lines[0]);
1224 if ((unique && cmp >= 0) || (cmp > 0))
1226 sorted = 0;
1227 break;
1231 finish:
1232 xfclose (fp);
1233 free (buf.buf);
1234 free ((char *) lines.lines);
1235 free (temp.text);
1236 return sorted;
1239 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1240 Close FPS before returning. */
1242 static void
1243 mergefps (FILE **fps, register int nfps, FILE *ofp)
1245 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1246 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1247 struct line saved; /* Saved line for unique check. */
1248 int savedflag = 0; /* True if there is a saved line. */
1249 int savealloc; /* Size allocated for the saved line. */
1250 int cur[NMERGE]; /* Current line in each line table. */
1251 int ord[NMERGE]; /* Table representing a permutation of fps,
1252 such that lines[ord[0]].lines[cur[ord[0]]]
1253 is the smallest line and will be next
1254 output. */
1255 register int i, j, t;
1257 #ifdef lint /* Suppress `used before initialized' warning. */
1258 savealloc = 0;
1259 #endif
1261 /* Allocate space for a saved line if necessary. */
1262 if (unique)
1264 savealloc = linelength;
1265 saved.text = xmalloc (savealloc);
1268 /* Read initial lines from each input file. */
1269 for (i = 0; i < nfps; ++i)
1271 initbuf (&buffer[i], mergealloc);
1272 /* If a file is empty, eliminate it from future consideration. */
1273 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1275 xfclose (fps[i]);
1276 --nfps;
1277 for (j = i; j < nfps; ++j)
1278 fps[j] = fps[j + 1];
1280 if (i == nfps)
1281 free (buffer[i].buf);
1282 else
1284 initlines (&lines[i], mergealloc / linelength + 1,
1285 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1286 findlines (&buffer[i], &lines[i]);
1287 cur[i] = 0;
1291 /* Set up the ord table according to comparisons among input lines.
1292 Since this only reorders two items if one is strictly greater than
1293 the other, it is stable. */
1294 for (i = 0; i < nfps; ++i)
1295 ord[i] = i;
1296 for (i = 1; i < nfps; ++i)
1297 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1298 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1299 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1301 /* Repeatedly output the smallest line until no input remains. */
1302 while (nfps)
1304 /* If uniqified output is turned on, output only the first of
1305 an identical series of lines. */
1306 if (unique)
1308 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1310 write_bytes (saved.text, saved.length, ofp);
1311 putc (eolchar, ofp);
1312 savedflag = 0;
1314 if (!savedflag)
1316 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1318 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1319 savealloc *= 2;
1320 saved.text = xrealloc (saved.text, savealloc);
1322 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1323 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1324 saved.length + 1);
1325 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1327 saved.keybeg = saved.text +
1328 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1329 - lines[ord[0]].lines[cur[ord[0]]].text);
1331 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1333 saved.keylim = saved.text +
1334 (lines[ord[0]].lines[cur[ord[0]]].keylim
1335 - lines[ord[0]].lines[cur[ord[0]]].text);
1337 savedflag = 1;
1340 else
1342 write_bytes (lines[ord[0]].lines[cur[ord[0]]].text,
1343 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1344 putc (eolchar, ofp);
1347 /* Check if we need to read more lines into core. */
1348 if (++cur[ord[0]] == lines[ord[0]].used)
1349 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1351 findlines (&buffer[ord[0]], &lines[ord[0]]);
1352 cur[ord[0]] = 0;
1354 else
1356 /* We reached EOF on fps[ord[0]]. */
1357 for (i = 1; i < nfps; ++i)
1358 if (ord[i] > ord[0])
1359 --ord[i];
1360 --nfps;
1361 xfclose (fps[ord[0]]);
1362 free (buffer[ord[0]].buf);
1363 free ((char *) lines[ord[0]].lines);
1364 for (i = ord[0]; i < nfps; ++i)
1366 fps[i] = fps[i + 1];
1367 buffer[i] = buffer[i + 1];
1368 lines[i] = lines[i + 1];
1369 cur[i] = cur[i + 1];
1371 for (i = 0; i < nfps; ++i)
1372 ord[i] = ord[i + 1];
1373 continue;
1376 /* The new line just read in may be larger than other lines
1377 already in core; push it back in the queue until we encounter
1378 a line larger than it. */
1379 for (i = 1; i < nfps; ++i)
1381 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1382 &lines[ord[i]].lines[cur[ord[i]]]);
1383 if (!t)
1384 t = ord[0] - ord[i];
1385 if (t < 0)
1386 break;
1388 t = ord[0];
1389 for (j = 1; j < i; ++j)
1390 ord[j - 1] = ord[j];
1391 ord[i - 1] = t;
1394 if (unique && savedflag)
1396 write_bytes (saved.text, saved.length, ofp);
1397 putc (eolchar, ofp);
1398 free (saved.text);
1402 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1404 static void
1405 sortlines (struct line *lines, int nlines, struct line *temp)
1407 register struct line *lo, *hi, *t;
1408 register int nlo, nhi;
1410 if (nlines == 2)
1412 if (compare (&lines[0], &lines[1]) > 0)
1413 *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1414 return;
1417 nlo = nlines / 2;
1418 lo = lines;
1419 nhi = nlines - nlo;
1420 hi = lines + nlo;
1422 if (nlo > 1)
1423 sortlines (lo, nlo, temp);
1425 if (nhi > 1)
1426 sortlines (hi, nhi, temp);
1428 t = temp;
1430 while (nlo && nhi)
1431 if (compare (lo, hi) <= 0)
1432 *t++ = *lo++, --nlo;
1433 else
1434 *t++ = *hi++, --nhi;
1435 while (nlo--)
1436 *t++ = *lo++;
1438 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1439 *lo++ = *t++;
1442 /* Check that each of the NFILES FILES is ordered.
1443 Return a count of disordered files. */
1445 static int
1446 check (char **files, int nfiles)
1448 int i, disorders = 0;
1449 FILE *fp;
1451 for (i = 0; i < nfiles; ++i)
1453 fp = xfopen (files[i], "r");
1454 if (!checkfp (fp))
1456 fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]);
1457 ++disorders;
1460 return disorders;
1463 /* Merge NFILES FILES onto OFP. */
1465 static void
1466 merge (char **files, int nfiles, FILE *ofp)
1468 int i, j, t;
1469 char *temp;
1470 FILE *fps[NMERGE], *tfp;
1472 while (nfiles > NMERGE)
1474 t = 0;
1475 for (i = 0; i < nfiles / NMERGE; ++i)
1477 for (j = 0; j < NMERGE; ++j)
1478 fps[j] = xfopen (files[i * NMERGE + j], "r");
1479 tfp = xtmpfopen (temp = tempname ());
1480 mergefps (fps, NMERGE, tfp);
1481 xfclose (tfp);
1482 for (j = 0; j < NMERGE; ++j)
1483 zaptemp (files[i * NMERGE + j]);
1484 files[t++] = temp;
1486 for (j = 0; j < nfiles % NMERGE; ++j)
1487 fps[j] = xfopen (files[i * NMERGE + j], "r");
1488 tfp = xtmpfopen (temp = tempname ());
1489 mergefps (fps, nfiles % NMERGE, tfp);
1490 xfclose (tfp);
1491 for (j = 0; j < nfiles % NMERGE; ++j)
1492 zaptemp (files[i * NMERGE + j]);
1493 files[t++] = temp;
1494 nfiles = t;
1497 for (i = 0; i < nfiles; ++i)
1498 fps[i] = xfopen (files[i], "r");
1499 mergefps (fps, i, ofp);
1500 for (i = 0; i < nfiles; ++i)
1501 zaptemp (files[i]);
1504 /* Sort NFILES FILES onto OFP. */
1506 static void
1507 sort (char **files, int nfiles, FILE *ofp)
1509 struct buffer buf;
1510 struct lines lines;
1511 struct line *tmp;
1512 int i, ntmp;
1513 FILE *fp, *tfp;
1514 struct tempnode *node;
1515 int n_temp_files = 0;
1516 char **tempfiles;
1518 initbuf (&buf, sortalloc);
1519 initlines (&lines, sortalloc / linelength + 1,
1520 LINEALLOC / sizeof (struct line));
1521 ntmp = lines.alloc;
1522 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1524 while (nfiles--)
1526 fp = xfopen (*files++, "r");
1527 while (fillbuf (&buf, fp))
1529 findlines (&buf, &lines);
1530 if (lines.used > ntmp)
1532 while (lines.used > ntmp)
1533 ntmp *= 2;
1534 tmp = (struct line *)
1535 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1537 sortlines (lines.lines, lines.used, tmp);
1538 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1539 tfp = ofp;
1540 else
1542 ++n_temp_files;
1543 tfp = xtmpfopen (tempname ());
1545 for (i = 0; i < lines.used; ++i)
1546 if (!unique || i == 0
1547 || compare (&lines.lines[i], &lines.lines[i - 1]))
1549 write_bytes (lines.lines[i].text, lines.lines[i].length, tfp);
1550 putc (eolchar, tfp);
1552 if (tfp != ofp)
1553 xfclose (tfp);
1555 xfclose (fp);
1558 free (buf.buf);
1559 free ((char *) lines.lines);
1560 free ((char *) tmp);
1562 if (n_temp_files)
1564 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1565 i = n_temp_files;
1566 for (node = temphead.next; i > 0; node = node->next)
1567 tempfiles[--i] = node->name;
1568 merge (tempfiles, n_temp_files, ofp);
1569 free ((char *) tempfiles);
1573 /* Insert key KEY at the end of the list (`keyhead'). */
1575 static void
1576 insertkey (struct keyfield *key)
1578 struct keyfield *k = &keyhead;
1580 while (k->next)
1581 k = k->next;
1582 k->next = key;
1583 key->next = NULL;
1586 static void
1587 badfieldspec (const char *s)
1589 error (2, 0, _("invalid field specification `%s'"), s);
1592 /* Handle interrupts and hangups. */
1594 static void
1595 sighandler (int sig)
1597 #ifdef SA_INTERRUPT
1598 struct sigaction sigact;
1600 sigact.sa_handler = SIG_DFL;
1601 sigemptyset (&sigact.sa_mask);
1602 sigact.sa_flags = 0;
1603 sigaction (sig, &sigact, NULL);
1604 #else /* !SA_INTERRUPT */
1605 signal (sig, SIG_DFL);
1606 #endif /* SA_INTERRUPT */
1607 cleanup ();
1608 kill (getpid (), sig);
1611 /* Set the ordering options for KEY specified in S.
1612 Return the address of the first character in S that
1613 is not a valid ordering option.
1614 BLANKTYPE is the kind of blanks that 'b' should skip. */
1616 static char *
1617 set_ordering (register const char *s, struct keyfield *key,
1618 enum blanktype blanktype)
1620 while (*s)
1622 switch (*s)
1624 case 'b':
1625 if (blanktype == bl_start || blanktype == bl_both)
1626 key->skipsblanks = 1;
1627 if (blanktype == bl_end || blanktype == bl_both)
1628 key->skipeblanks = 1;
1629 break;
1630 case 'd':
1631 key->ignore = nondictionary;
1632 break;
1633 case 'f':
1634 key->translate = fold_toupper;
1635 break;
1636 case 'g':
1637 key->general_numeric = 1;
1638 break;
1639 case 'i':
1640 key->ignore = nonprinting;
1641 break;
1642 case 'M':
1643 key->month = 1;
1644 break;
1645 case 'n':
1646 key->numeric = 1;
1647 if (blanktype == bl_start || blanktype == bl_both)
1648 key->skipsblanks = 1;
1649 if (blanktype == bl_end || blanktype == bl_both)
1650 key->skipeblanks = 1;
1651 break;
1652 case 'r':
1653 key->reverse = 1;
1654 break;
1655 default:
1656 return (char *) s;
1658 ++s;
1660 return (char *) s;
1663 void
1664 main (int argc, char **argv)
1666 struct keyfield *key = NULL, gkey;
1667 char *s;
1668 int i, t, t2;
1669 int checkonly = 0, mergeonly = 0, nfiles = 0;
1670 char *minus = "-", *outfile = minus, **files, *tmp;
1671 FILE *ofp;
1672 #ifdef SA_INTERRUPT
1673 struct sigaction oldact, newact;
1674 #endif /* SA_INTERRUPT */
1676 program_name = argv[0];
1677 setlocale (LC_ALL, "");
1678 bindtextdomain (PACKAGE, LOCALEDIR);
1679 textdomain (PACKAGE);
1681 parse_long_options (argc, argv, "sort", version_string, usage);
1683 have_read_stdin = 0;
1684 inittables ();
1686 temp_file_prefix = getenv ("TMPDIR");
1687 if (temp_file_prefix == NULL)
1688 temp_file_prefix = DEFAULT_TMPDIR;
1690 #ifdef SA_INTERRUPT
1691 newact.sa_handler = sighandler;
1692 sigemptyset (&newact.sa_mask);
1693 newact.sa_flags = 0;
1695 sigaction (SIGINT, NULL, &oldact);
1696 if (oldact.sa_handler != SIG_IGN)
1697 sigaction (SIGINT, &newact, NULL);
1698 sigaction (SIGHUP, NULL, &oldact);
1699 if (oldact.sa_handler != SIG_IGN)
1700 sigaction (SIGHUP, &newact, NULL);
1701 sigaction (SIGPIPE, NULL, &oldact);
1702 if (oldact.sa_handler != SIG_IGN)
1703 sigaction (SIGPIPE, &newact, NULL);
1704 sigaction (SIGTERM, NULL, &oldact);
1705 if (oldact.sa_handler != SIG_IGN)
1706 sigaction (SIGTERM, &newact, NULL);
1707 #else /* !SA_INTERRUPT */
1708 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1709 signal (SIGINT, sighandler);
1710 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1711 signal (SIGHUP, sighandler);
1712 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1713 signal (SIGPIPE, sighandler);
1714 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1715 signal (SIGTERM, sighandler);
1716 #endif /* !SA_INTERRUPT */
1718 gkey.sword = gkey.eword = -1;
1719 gkey.ignore = NULL;
1720 gkey.translate = NULL;
1721 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1722 gkey.skipsblanks = gkey.skipeblanks = 0;
1724 files = (char **) xmalloc (sizeof (char *) * argc);
1726 for (i = 1; i < argc; ++i)
1728 if (argv[i][0] == '+')
1730 if (key)
1731 insertkey (key);
1732 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1733 key->eword = -1;
1734 key->ignore = NULL;
1735 key->translate = NULL;
1736 key->skipsblanks = key->skipeblanks = 0;
1737 key->numeric = key->general_numeric = key->month = key->reverse = 0;
1738 s = argv[i] + 1;
1739 if (! (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])])))
1740 badfieldspec (argv[i]);
1741 for (t = 0; digits[UCHAR (*s)]; ++s)
1742 t = 10 * t + *s - '0';
1743 t2 = 0;
1744 if (*s == '.')
1745 for (++s; digits[UCHAR (*s)]; ++s)
1746 t2 = 10 * t2 + *s - '0';
1747 if (t2 || t)
1749 key->sword = t;
1750 key->schar = t2;
1752 else
1753 key->sword = -1;
1754 s = set_ordering (s, key, bl_start);
1755 if (*s)
1756 badfieldspec (argv[i]);
1758 else if (argv[i][0] == '-' && argv[i][1])
1760 s = argv[i] + 1;
1761 if (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])]))
1763 if (!key)
1764 usage (2);
1765 for (t = 0; digits[UCHAR (*s)]; ++s)
1766 t = t * 10 + *s - '0';
1767 t2 = 0;
1768 if (*s == '.')
1769 for (++s; digits[UCHAR (*s)]; ++s)
1770 t2 = t2 * 10 + *s - '0';
1771 key->eword = t;
1772 key->echar = t2;
1773 s = set_ordering (s, key, bl_end);
1774 if (*s)
1775 badfieldspec (argv[i]);
1776 insertkey (key);
1777 key = NULL;
1779 else
1780 while (*s)
1782 s = set_ordering (s, &gkey, bl_both);
1783 switch (*s)
1785 case '\0':
1786 break;
1787 case 'c':
1788 checkonly = 1;
1789 break;
1790 case 'k':
1791 if (s[1])
1792 ++s;
1793 else
1795 if (i == argc - 1)
1796 error (2, 0, _("option `-k' requires an argument"));
1797 else
1798 s = argv[++i];
1800 if (key)
1801 insertkey (key);
1802 key = (struct keyfield *)
1803 xmalloc (sizeof (struct keyfield));
1804 key->eword = -1;
1805 key->ignore = NULL;
1806 key->translate = NULL;
1807 key->skipsblanks = key->skipeblanks = 0;
1808 key->numeric = key->month = key->reverse = 0;
1809 /* Get POS1. */
1810 if (!digits[UCHAR (*s)])
1811 badfieldspec (argv[i]);
1812 for (t = 0; digits[UCHAR (*s)]; ++s)
1813 t = 10 * t + *s - '0';
1814 if (t == 0)
1816 /* Provoke with `sort -k0' */
1817 error (0, 0, _("the starting field number argument \
1818 to the `-k' option must be positive"));
1819 badfieldspec (argv[i]);
1821 --t;
1822 t2 = 0;
1823 if (*s == '.')
1825 if (!digits[UCHAR (s[1])])
1827 /* Provoke with `sort -k1.' */
1828 error (0, 0, _("starting field spec has `.' but \
1829 lacks following character offset"));
1830 badfieldspec (argv[i]);
1832 for (++s; digits[UCHAR (*s)]; ++s)
1833 t2 = 10 * t2 + *s - '0';
1834 if (t2 == 0)
1836 /* Provoke with `sort -k1.0' */
1837 error (0, 0, _("starting field character offset \
1838 argument to the `-k' option\nmust be positive"));
1839 badfieldspec (argv[i]);
1841 --t2;
1843 if (t2 || t)
1845 key->sword = t;
1846 key->schar = t2;
1848 else
1849 key->sword = -1;
1850 s = set_ordering (s, key, bl_start);
1851 if (*s == 0)
1853 key->eword = -1;
1854 key->echar = 0;
1856 else if (*s != ',')
1857 badfieldspec (argv[i]);
1858 else if (*s == ',')
1860 /* Skip over comma. */
1861 ++s;
1862 if (*s == 0)
1864 /* Provoke with `sort -k1,' */
1865 error (0, 0, _("field specification has `,' but \
1866 lacks following field spec"));
1867 badfieldspec (argv[i]);
1869 /* Get POS2. */
1870 for (t = 0; digits[UCHAR (*s)]; ++s)
1871 t = t * 10 + *s - '0';
1872 if (t == 0)
1874 /* Provoke with `sort -k1,0' */
1875 error (0, 0, _("ending field number argument \
1876 to the `-k' option must be positive"));
1877 badfieldspec (argv[i]);
1879 --t;
1880 t2 = 0;
1881 if (*s == '.')
1883 if (!digits[UCHAR (s[1])])
1885 /* Provoke with `sort -k1,1.' */
1886 error (0, 0, _("ending field spec has `.' \
1887 but lacks following character offset"));
1888 badfieldspec (argv[i]);
1890 for (++s; digits[UCHAR (*s)]; ++s)
1891 t2 = t2 * 10 + *s - '0';
1893 else
1895 /* `-k 2,3' is equivalent to `+1 -3'. */
1896 ++t;
1898 key->eword = t;
1899 key->echar = t2;
1900 s = set_ordering (s, key, bl_end);
1901 if (*s)
1902 badfieldspec (argv[i]);
1904 insertkey (key);
1905 key = NULL;
1906 goto outer;
1907 case 'm':
1908 mergeonly = 1;
1909 break;
1910 case 'o':
1911 if (s[1])
1912 outfile = s + 1;
1913 else
1915 if (i == argc - 1)
1916 error (2, 0, _("option `-o' requires an argument"));
1917 else
1918 outfile = argv[++i];
1920 goto outer;
1921 case 's':
1922 stable = 1;
1923 break;
1924 case 't':
1925 if (s[1])
1926 tab = *++s;
1927 else if (i < argc - 1)
1929 tab = *argv[++i];
1930 goto outer;
1932 else
1933 error (2, 0, _("option `-t' requires an argument"));
1934 break;
1935 case 'T':
1936 if (s[1])
1937 temp_file_prefix = ++s;
1938 else
1940 if (i < argc - 1)
1941 temp_file_prefix = argv[++i];
1942 else
1943 error (2, 0, _("option `-T' requires an argument"));
1945 goto outer;
1946 /* break; */
1947 case 'u':
1948 unique = 1;
1949 break;
1950 case 'z':
1951 eolchar = 0;
1952 break;
1953 case 'y':
1954 /* Accept and ignore e.g. -y0 for compatibility with
1955 Solaris 2. */
1956 goto outer;
1957 default:
1958 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
1959 argv[0], *s);
1960 usage (2);
1962 if (*s)
1963 ++s;
1966 else /* Not an option. */
1968 files[nfiles++] = argv[i];
1970 outer:;
1973 if (key)
1974 insertkey (key);
1976 /* Inheritance of global options to individual keys. */
1977 for (key = keyhead.next; key; key = key->next)
1978 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
1979 && !key->skipeblanks && !key->month && !key->numeric
1980 && !key->general_numeric)
1982 key->ignore = gkey.ignore;
1983 key->translate = gkey.translate;
1984 key->skipsblanks = gkey.skipsblanks;
1985 key->skipeblanks = gkey.skipeblanks;
1986 key->month = gkey.month;
1987 key->numeric = gkey.numeric;
1988 key->general_numeric = gkey.general_numeric;
1989 key->reverse = gkey.reverse;
1992 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
1993 || gkey.skipeblanks || gkey.month || gkey.numeric
1994 || gkey.general_numeric))
1995 insertkey (&gkey);
1996 reverse = gkey.reverse;
1998 if (nfiles == 0)
2000 nfiles = 1;
2001 files = &minus;
2004 if (checkonly)
2005 exit (check (files, nfiles) != 0);
2007 if (strcmp (outfile, "-"))
2009 struct stat outstat;
2010 if (stat (outfile, &outstat) == 0)
2012 /* The following code prevents a race condition when
2013 people use the brain dead shell programming idiom:
2014 cat file | sort -o file
2015 This feature is provided for historical compatibility,
2016 but we strongly discourage ever relying on this in
2017 new shell programs. */
2019 /* Temporarily copy each input file that might be another name
2020 for the output file. When in doubt (e.g. a pipe), copy. */
2021 for (i = 0; i < nfiles; ++i)
2023 char buf[8192];
2024 FILE *fp;
2025 int cc;
2027 if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
2029 struct stat instat;
2030 if ((strcmp (files[i], "-")
2031 ? stat (files[i], &instat)
2032 : fstat (fileno (stdin), &instat)) != 0)
2034 error (0, errno, "%s", files[i]);
2035 cleanup ();
2036 exit (2);
2038 if (S_ISREG (instat.st_mode)
2039 && (instat.st_ino != outstat.st_ino
2040 || instat.st_dev != outstat.st_dev))
2042 /* We know the files are distinct. */
2043 continue;
2047 fp = xfopen (files[i], "r");
2048 tmp = tempname ();
2049 ofp = xtmpfopen (tmp);
2050 while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
2051 write_bytes (buf, cc, ofp);
2052 if (ferror (fp))
2054 error (0, errno, "%s", files[i]);
2055 cleanup ();
2056 exit (2);
2058 xfclose (ofp);
2059 xfclose (fp);
2060 files[i] = tmp;
2063 ofp = xfopen (outfile, "w");
2065 else
2066 ofp = stdout;
2068 if (mergeonly)
2069 merge (files, nfiles, ofp);
2070 else
2071 sort (files, nfiles, ofp);
2072 cleanup ();
2074 /* If we wait for the implicit flush on exit, and the parent process
2075 has closed stdout (e.g., exec >&- in a shell), then the output file
2076 winds up empty. I don't understand why. This is under SunOS,
2077 Solaris, Ultrix, and Irix. This premature fflush makes the output
2078 reappear. --karl@cs.umb.edu */
2079 if (fflush (ofp) < 0)
2080 error (1, errno, _("%s: write error"), outfile);
2082 if (have_read_stdin && fclose (stdin) == EOF)
2083 error (1, errno, outfile);
2084 if (ferror (stdout) || fclose (stdout) == EOF)
2085 error (1, errno, _("%s: write error"), outfile);
2087 exit (0);