(main): Give a better diagnostic for `sort -0'. Reported by Karl Berry.
[coreutils.git] / src / sort.c
blob224e7d34b4ff8f93e0fa82bea145d05f5ec8a75b
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 91, 92, 93, 94, 95, 1996 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
22 #include <config.h>
24 /* Get isblank from GNU libc. */
25 #define _GNU_SOURCE
27 #include <sys/types.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #define NDEBUG 1
31 #include <assert.h>
32 #include "system.h"
33 #include "long-options.h"
34 #include "error.h"
35 #include "xstrtod.h"
37 #ifdef HAVE_LIMITS_H
38 #include <limits.h>
39 #else
40 #ifndef UCHAR_MAX
41 #define UCHAR_MAX 255
42 #endif
43 #endif
44 #ifndef STDC_HEADERS
45 char *malloc ();
46 char *realloc ();
47 void free ();
48 #endif
50 /* Undefine, to avoid warning about redefinition on some systems. */
51 #undef min
52 #define min(a, b) ((a) < (b) ? (a) : (b))
54 #define UCHAR_LIM (UCHAR_MAX + 1)
55 #define UCHAR(c) ((unsigned char) (c))
57 #ifndef DEFAULT_TMPDIR
58 #define DEFAULT_TMPDIR "/tmp"
59 #endif
61 /* Use this as exit status in case of error, not EXIT_FAILURE. This
62 is necessary because EXIT_FAILURE is usually 1 and POSIX requires
63 that sort exit with status 1 IFF invoked with -c and the input is
64 not properly sorted. Any other irregular exit must exit with a
65 status code greater than 1. */
66 #define SORT_FAILURE 2
68 /* The kind of blanks for '-b' to skip in various options. */
69 enum blanktype { bl_start, bl_end, bl_both };
71 /* The character marking end of line. Default to \n. */
72 int eolchar = '\n';
74 /* Lines are held in core as counted strings. */
75 struct line
77 char *text; /* Text of the line. */
78 int length; /* Length not including final newline. */
79 char *keybeg; /* Start of first key. */
80 char *keylim; /* Limit of first key. */
83 /* Arrays of lines. */
84 struct lines
86 struct line *lines; /* Dynamically allocated array of lines. */
87 int used; /* Number of slots used. */
88 int alloc; /* Number of slots allocated. */
89 int limit; /* Max number of slots to allocate. */
92 /* Input buffers. */
93 struct buffer
95 char *buf; /* Dynamically allocated buffer. */
96 int used; /* Number of bytes used. */
97 int alloc; /* Number of bytes allocated. */
98 int left; /* Number of bytes left after line parsing. */
101 struct keyfield
103 int sword; /* Zero-origin 'word' to start at. */
104 int schar; /* Additional characters to skip. */
105 int skipsblanks; /* Skip leading white space at start. */
106 int eword; /* Zero-origin first word after field. */
107 int echar; /* Additional characters in field. */
108 int skipeblanks; /* Skip trailing white space at finish. */
109 int *ignore; /* Boolean array of characters to ignore. */
110 char *translate; /* Translation applied to characters. */
111 int numeric; /* Flag for numeric comparison. Handle
112 strings of digits with optional decimal
113 point, but no exponential notation. */
114 int general_numeric; /* Flag for general, numeric comparison.
115 Handle numbers in exponential notation. */
116 int month; /* Flag for comparison by month name. */
117 int reverse; /* Reverse the sense of comparison. */
118 struct keyfield *next; /* Next keyfield to try. */
121 struct month
123 char *name;
124 int val;
127 /* The name this program was run with. */
128 char *program_name;
130 /* Table of digits. */
131 static int digits[UCHAR_LIM];
133 /* Table of white space. */
134 static int blanks[UCHAR_LIM];
136 /* Table of non-printing characters. */
137 static int nonprinting[UCHAR_LIM];
139 /* Table of non-dictionary characters (not letters, digits, or blanks). */
140 static int nondictionary[UCHAR_LIM];
142 /* Translation table folding lower case to upper. */
143 static char fold_toupper[UCHAR_LIM];
145 /* Table mapping 3-letter month names to integers.
146 Alphabetic order allows binary search. */
147 static struct month const monthtab[] =
149 {"APR", 4},
150 {"AUG", 8},
151 {"DEC", 12},
152 {"FEB", 2},
153 {"JAN", 1},
154 {"JUL", 7},
155 {"JUN", 6},
156 {"MAR", 3},
157 {"MAY", 5},
158 {"NOV", 11},
159 {"OCT", 10},
160 {"SEP", 9}
163 /* During the merge phase, the number of files to merge at once. */
164 #define NMERGE 16
166 /* Initial buffer size for in core sorting. Will not grow unless a
167 line longer than this is seen. */
168 static int sortalloc = 512 * 1024;
170 /* Initial buffer size for in core merge buffers. Bear in mind that
171 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
172 static int mergealloc = 16 * 1024;
174 /* Guess of average line length. */
175 static int linelength = 30;
177 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
178 #define LINEALLOC (256 * 1024)
180 /* Prefix for temporary file names. */
181 static char *temp_file_prefix;
183 /* Flag to reverse the order of all comparisons. */
184 static int reverse;
186 /* Flag for stable sort. This turns off the last ditch bytewise
187 comparison of lines, and instead leaves lines in the same order
188 they were read if all keys compare equal. */
189 static int stable;
191 /* Tab character separating fields. If NUL, then fields are separated
192 by the empty string between a non-whitespace character and a whitespace
193 character. */
194 static char tab;
196 /* Flag to remove consecutive duplicate lines from the output.
197 Only the last of a sequence of equal lines will be output. */
198 static int unique;
200 /* Nonzero if any of the input files are the standard input. */
201 static int have_read_stdin;
203 /* Lists of key field comparisons to be tried. */
204 static struct keyfield keyhead;
206 static void
207 usage (int status)
209 if (status != 0)
210 fprintf (stderr, _("Try `%s --help' for more information.\n"),
211 program_name);
212 else
214 printf (_("\
215 Usage: %s [OPTION]... [FILE]...\n\
217 program_name);
218 printf (_("\
219 Write sorted concatenation of all FILE(s) to standard output.\n\
221 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
222 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
223 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
224 -b ignore leading blanks in sort fields or keys\n\
225 -c check if given files already sorted, do not sort\n\
226 -d consider only [a-zA-Z0-9 ] characters in keys\n\
227 -f fold lower case to upper case characters in keys\n\
228 -g compare according to general numerical value, imply -b\n\
229 -i consider only [\\040-\\0176] characters in keys\n\
230 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
231 -m merge already sorted files, do not sort\n\
232 -n compare according to string numerical value, imply -b\n\
233 -o FILE write result on FILE instead of standard output\n\
234 -r reverse the result of comparisons\n\
235 -s stabilize sort by disabling last resort comparison\n\
236 -t SEP use SEParator instead of non- to whitespace transition\n\
237 -u with -c, check for strict ordering\n\
238 -u with -m, only output the first of an equal sequence\n\
239 -z end lines with 0 byte, not newline, for find -print0\n\
240 --help display this help and exit\n\
241 --version output version information and exit\n\
243 POS is F[.C][OPTS], where F is the field number and C the character\n\
244 position in the field, both counted from zero. OPTS is made up of one\n\
245 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
246 for that key. If no key given, use the entire line as key. With no\n\
247 FILE, or when FILE is -, read standard input.\n\
249 , DEFAULT_TMPDIR);
251 /* Don't use EXIT_FAILURE here in case it is defined to be 1.
252 POSIX requires that sort return 1 IFF invoked with -c and
253 the input is not properly sorted. */
254 assert (status == 0 || status == SORT_FAILURE);
255 exit (status);
258 /* The list of temporary files. */
259 static struct tempnode
261 char *name;
262 struct tempnode *next;
263 } temphead;
265 /* Clean up any remaining temporary files. */
267 static void
268 cleanup (void)
270 struct tempnode *node;
272 for (node = temphead.next; node; node = node->next)
273 unlink (node->name);
276 /* Allocate N bytes of memory dynamically, with error checking. */
278 static char *
279 xmalloc (unsigned int n)
281 char *p;
283 p = malloc (n);
284 if (p == 0)
286 error (0, 0, _("virtual memory exhausted"));
287 cleanup ();
288 exit (SORT_FAILURE);
290 return p;
293 /* Change the size of an allocated block of memory P to N bytes,
294 with error checking.
295 If P is NULL, run xmalloc.
296 If N is 0, run free and return NULL. */
298 static char *
299 xrealloc (char *p, unsigned int n)
301 if (p == 0)
302 return xmalloc (n);
303 if (n == 0)
305 free (p);
306 return 0;
308 p = realloc (p, n);
309 if (p == 0)
311 error (0, 0, _("virtual memory exhausted"));
312 cleanup ();
313 exit (SORT_FAILURE);
315 return p;
318 static FILE *
319 xtmpfopen (const char *file)
321 FILE *fp;
322 int fd;
324 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
325 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
327 error (0, errno, "%s", file);
328 cleanup ();
329 exit (SORT_FAILURE);
332 return fp;
335 static FILE *
336 xfopen (const char *file, const char *how)
338 FILE *fp;
340 if (strcmp (file, "-") == 0)
342 fp = stdin;
344 else
346 if ((fp = fopen (file, how)) == NULL)
348 error (0, errno, "%s", file);
349 cleanup ();
350 exit (SORT_FAILURE);
354 if (fp == stdin)
355 have_read_stdin = 1;
356 return fp;
359 static void
360 xfclose (FILE *fp)
362 if (fp == stdin)
364 /* Allow reading stdin from tty more than once. */
365 if (feof (fp))
366 clearerr (fp);
368 else if (fp == stdout)
370 if (fflush (fp) != 0)
372 error (0, errno, _("flushing file"));
373 cleanup ();
374 exit (SORT_FAILURE);
377 else
379 if (fclose (fp) != 0)
381 error (0, errno, _("error closing file"));
382 cleanup ();
383 exit (SORT_FAILURE);
388 static void
389 write_bytes (const char *buf, size_t n_bytes, FILE *fp)
391 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
393 error (0, errno, _("write error"));
394 cleanup ();
395 exit (SORT_FAILURE);
399 /* Return a name for a temporary file. */
401 static char *
402 tempname (void)
404 static unsigned int seq;
405 int len = strlen (temp_file_prefix);
406 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
407 struct tempnode *node;
409 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
410 sprintf (name,
411 "%s%ssort%5.5d%5.5d",
412 temp_file_prefix,
413 (len && temp_file_prefix[len - 1] != '/') ? "/" : "",
414 (unsigned int) getpid () & 0xffff, seq);
416 /* Make sure that SEQ's value fits in 5 digits. */
417 ++seq;
418 if (seq >= 100000)
419 seq = 0;
421 node->name = name;
422 node->next = temphead.next;
423 temphead.next = node;
424 return name;
427 /* Search through the list of temporary files for NAME;
428 remove it if it is found on the list. */
430 static void
431 zaptemp (char *name)
433 struct tempnode *node, *temp;
435 for (node = &temphead; node->next; node = node->next)
436 if (!strcmp (name, node->next->name))
437 break;
438 if (node->next)
440 temp = node->next;
441 unlink (temp->name);
442 free (temp->name);
443 node->next = temp->next;
444 free ((char *) temp);
448 /* Initialize the character class tables. */
450 static void
451 inittables (void)
453 int i;
455 for (i = 0; i < UCHAR_LIM; ++i)
457 if (ISBLANK (i))
458 blanks[i] = 1;
459 if (ISDIGIT (i))
460 digits[i] = 1;
461 if (!ISPRINT (i))
462 nonprinting[i] = 1;
463 if (!ISALNUM (i) && !ISBLANK (i))
464 nondictionary[i] = 1;
465 if (ISLOWER (i))
466 fold_toupper[i] = toupper (i);
467 else
468 fold_toupper[i] = i;
472 /* Initialize BUF, allocating ALLOC bytes initially. */
474 static void
475 initbuf (struct buffer *buf, int alloc)
477 buf->alloc = alloc;
478 buf->buf = xmalloc (buf->alloc);
479 buf->used = buf->left = 0;
482 /* Fill BUF reading from FP, moving buf->left bytes from the end
483 of buf->buf to the beginning first. If EOF is reached and the
484 file wasn't terminated by a newline, supply one. Return a count
485 of bytes buffered. */
487 static int
488 fillbuf (struct buffer *buf, FILE *fp)
490 int cc;
492 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
493 buf->used = buf->left;
495 while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, eolchar, buf->used)))
497 if (buf->used == buf->alloc)
499 buf->alloc *= 2;
500 buf->buf = xrealloc (buf->buf, buf->alloc);
502 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
503 if (ferror (fp))
505 error (0, errno, _("read error"));
506 cleanup ();
507 exit (SORT_FAILURE);
509 buf->used += cc;
512 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
514 if (buf->used == buf->alloc)
516 buf->alloc *= 2;
517 buf->buf = xrealloc (buf->buf, buf->alloc);
519 buf->buf[buf->used++] = eolchar;
522 return buf->used;
525 /* Initialize LINES, allocating space for ALLOC lines initially.
526 LIMIT is the maximum possible number of lines to allocate space
527 for, ever. */
529 static void
530 initlines (struct lines *lines, int alloc, int limit)
532 lines->alloc = alloc;
533 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
534 lines->used = 0;
535 lines->limit = limit;
538 /* Return a pointer to the first character of the field specified
539 by KEY in LINE. */
541 static char *
542 begfield (const struct line *line, const struct keyfield *key)
544 register char *ptr = line->text, *lim = ptr + line->length;
545 register int sword = key->sword, schar = key->schar;
547 if (tab)
548 while (ptr < lim && sword--)
550 while (ptr < lim && *ptr != tab)
551 ++ptr;
552 if (ptr < lim)
553 ++ptr;
555 else
556 while (ptr < lim && sword--)
558 while (ptr < lim && blanks[UCHAR (*ptr)])
559 ++ptr;
560 while (ptr < lim && !blanks[UCHAR (*ptr)])
561 ++ptr;
564 if (key->skipsblanks)
565 while (ptr < lim && blanks[UCHAR (*ptr)])
566 ++ptr;
568 if (ptr + schar <= lim)
569 ptr += schar;
570 else
571 ptr = lim;
573 return ptr;
576 /* Return the limit of (a pointer to the first character after) the field
577 in LINE specified by KEY. */
579 static char *
580 limfield (const struct line *line, const struct keyfield *key)
582 register char *ptr = line->text, *lim = ptr + line->length;
583 register int eword = key->eword, echar = key->echar;
585 /* Note: from the POSIX spec:
586 The leading field separator itself is included in
587 a field when -t is not used. FIXME: move this comment up... */
589 /* Move PTR past EWORD fields or to one past the last byte on LINE,
590 whichever comes first. If there are more than EWORD fields, leave
591 PTR pointing at the beginning of the field having zero-based index,
592 EWORD. If a delimiter character was specified (via -t), then that
593 `beginning' is the first character following the delimiting TAB.
594 Otherwise, leave PTR pointing at the first `blank' character after
595 the preceding field. */
596 if (tab)
597 while (ptr < lim && eword--)
599 while (ptr < lim && *ptr != tab)
600 ++ptr;
601 if (ptr < lim && (eword || echar > 0))
602 ++ptr;
604 else
605 while (ptr < lim && eword--)
607 while (ptr < lim && blanks[UCHAR (*ptr)])
608 ++ptr;
609 while (ptr < lim && !blanks[UCHAR (*ptr)])
610 ++ptr;
613 /* Make LIM point to the end of (one byte past) the current field. */
614 if (tab)
616 char *newlim;
617 newlim = memchr (ptr, tab, lim - ptr);
618 if (newlim)
619 lim = newlim;
621 else
623 char *newlim;
624 newlim = ptr;
625 while (newlim < lim && blanks[UCHAR (*newlim)])
626 ++newlim;
627 while (newlim < lim && !blanks[UCHAR (*newlim)])
628 ++newlim;
629 lim = newlim;
632 /* If we're skipping leading blanks, don't start counting characters
633 until after skipping past any leading blanks. */
634 if (key->skipsblanks)
635 while (ptr < lim && blanks[UCHAR (*ptr)])
636 ++ptr;
638 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
639 if (ptr + echar <= lim)
640 ptr += echar;
641 else
642 ptr = lim;
644 return ptr;
647 /* FIXME */
649 void
650 trim_trailing_blanks (const char *a_start, char **a_end)
652 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
653 --(*a_end);
656 /* Find the lines in BUF, storing pointers and lengths in LINES.
657 Also replace newlines in BUF with NULs. */
659 static void
660 findlines (struct buffer *buf, struct lines *lines)
662 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
663 struct keyfield *key = keyhead.next;
665 lines->used = 0;
667 while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))
668 && lines->used < lines->limit)
670 /* There are various places in the code that rely on a NUL
671 being at the end of in-core lines; NULs inside the lines
672 will not cause trouble, though. */
673 *ptr = '\0';
675 if (lines->used == lines->alloc)
677 lines->alloc *= 2;
678 lines->lines = (struct line *)
679 xrealloc ((char *) lines->lines,
680 lines->alloc * sizeof (struct line));
683 lines->lines[lines->used].text = beg;
684 lines->lines[lines->used].length = ptr - beg;
686 /* Precompute the position of the first key for efficiency. */
687 if (key)
689 if (key->eword >= 0)
690 lines->lines[lines->used].keylim =
691 limfield (&lines->lines[lines->used], key);
692 else
693 lines->lines[lines->used].keylim = ptr;
695 if (key->sword >= 0)
696 lines->lines[lines->used].keybeg =
697 begfield (&lines->lines[lines->used], key);
698 else
700 if (key->skipsblanks)
701 while (blanks[UCHAR (*beg)])
702 ++beg;
703 lines->lines[lines->used].keybeg = beg;
705 if (key->skipeblanks)
707 trim_trailing_blanks (lines->lines[lines->used].keybeg,
708 &lines->lines[lines->used].keylim);
711 else
713 lines->lines[lines->used].keybeg = 0;
714 lines->lines[lines->used].keylim = 0;
717 ++lines->used;
718 beg = ptr + 1;
721 buf->left = lim - beg;
724 /* Compare strings A and B containing decimal fractions < 1. Each string
725 should begin with a decimal point followed immediately by the digits
726 of the fraction. Strings not of this form are considered to be zero. */
728 static int
729 fraccompare (register const char *a, register const char *b)
731 register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
733 if (tmpa == '.' && tmpb == '.')
736 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
737 while (tmpa == tmpb && digits[tmpa]);
738 if (digits[tmpa] && digits[tmpb])
739 return tmpa - tmpb;
740 if (digits[tmpa])
742 while (tmpa == '0')
743 tmpa = UCHAR (*++a);
744 if (digits[tmpa])
745 return 1;
746 return 0;
748 if (digits[tmpb])
750 while (tmpb == '0')
751 tmpb = UCHAR (*++b);
752 if (digits[tmpb])
753 return -1;
754 return 0;
756 return 0;
758 else if (tmpa == '.')
761 tmpa = UCHAR (*++a);
762 while (tmpa == '0');
763 if (digits[tmpa])
764 return 1;
765 return 0;
767 else if (tmpb == '.')
770 tmpb = UCHAR (*++b);
771 while (tmpb == '0');
772 if (digits[tmpb])
773 return -1;
774 return 0;
776 return 0;
779 /* Compare strings A and B as numbers without explicitly converting them to
780 machine numbers. Comparatively slow for short strings, but asymptotically
781 hideously fast. */
783 static int
784 numcompare (register const char *a, register const char *b)
786 register int tmpa, tmpb, loga, logb, tmp;
788 tmpa = UCHAR (*a);
789 tmpb = UCHAR (*b);
791 while (blanks[tmpa])
792 tmpa = UCHAR (*++a);
793 while (blanks[tmpb])
794 tmpb = UCHAR (*++b);
796 if (tmpa == '-')
799 tmpa = UCHAR (*++a);
800 while (tmpa == '0');
801 if (tmpb != '-')
803 if (tmpa == '.')
805 tmpa = UCHAR (*++a);
806 while (tmpa == '0');
807 if (digits[tmpa])
808 return -1;
809 while (tmpb == '0')
810 tmpb = UCHAR (*++b);
811 if (tmpb == '.')
813 tmpb = *++b;
814 while (tmpb == '0');
815 if (digits[tmpb])
816 return -1;
817 return 0;
820 tmpb = UCHAR (*++b);
821 while (tmpb == '0');
823 while (tmpa == tmpb && digits[tmpa])
824 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
826 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
827 return -fraccompare (a, b);
829 if (digits[tmpa])
830 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
832 else
833 loga = 0;
835 if (digits[tmpb])
836 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
838 else
839 logb = 0;
841 if ((tmp = logb - loga) != 0)
842 return tmp;
844 if (!loga)
845 return 0;
847 return tmpb - tmpa;
849 else if (tmpb == '-')
852 tmpb = UCHAR (*++b);
853 while (tmpb == '0');
854 if (tmpb == '.')
856 tmpb = *++b;
857 while (tmpb == '0');
858 if (digits[tmpb])
859 return 1;
860 while (tmpa == '0')
861 tmpa = UCHAR (*++a);
862 if (tmpa == '.')
864 tmpa = UCHAR (*++a);
865 while (tmpa == '0');
866 if (digits[tmpa])
867 return 1;
868 return 0;
870 else
872 while (tmpa == '0')
873 tmpa = UCHAR (*++a);
874 while (tmpb == '0')
875 tmpb = UCHAR (*++b);
877 while (tmpa == tmpb && digits[tmpa])
878 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
880 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
881 return fraccompare (a, b);
883 if (digits[tmpa])
884 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
886 else
887 loga = 0;
889 if (digits[tmpb])
890 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
892 else
893 logb = 0;
895 if ((tmp = loga - logb) != 0)
896 return tmp;
898 if (!loga)
899 return 0;
901 return tmpa - tmpb;
905 static int
906 general_numcompare (const char *sa, const char *sb)
908 double a, b;
909 /* FIXME: add option to warn about failed conversions. */
910 /* FIXME: maybe add option to try expensive FP conversion
911 only if A and B can't be compared more cheaply/accurately. */
912 if (xstrtod (sa, NULL, &a))
914 a = 0;
916 if (xstrtod (sb, NULL, &b))
918 b = 0;
920 return a == b ? 0 : a < b ? -1 : 1;
923 /* Return an integer <= 12 associated with month name S with length LEN,
924 0 if the name in S is not recognized. */
926 static int
927 getmonth (const char *s, int len)
929 char month[4];
930 register int i, lo = 0, hi = 12;
932 while (len > 0 && blanks[UCHAR(*s)])
933 ++s, --len;
935 if (len < 3)
936 return 0;
938 for (i = 0; i < 3; ++i)
939 month[i] = fold_toupper[UCHAR (s[i])];
940 month[3] = '\0';
942 while (hi - lo > 1)
943 if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
944 hi = (lo + hi) / 2;
945 else
946 lo = (lo + hi) / 2;
947 if (!strcmp (month, monthtab[lo].name))
948 return monthtab[lo].val;
949 return 0;
952 /* Compare two lines A and B trying every key in sequence until there
953 are no more keys or a difference is found. */
955 static int
956 keycompare (const struct line *a, const struct line *b)
958 register char *texta, *textb, *lima, *limb, *translate;
959 register int *ignore;
960 struct keyfield *key;
961 int diff = 0, iter = 0, lena, lenb;
963 for (key = keyhead.next; key; key = key->next, ++iter)
965 ignore = key->ignore;
966 translate = key->translate;
968 /* Find the beginning and limit of each field. */
969 if (iter || a->keybeg == NULL || b->keybeg == NULL)
971 if (key->eword >= 0)
972 lima = limfield (a, key), limb = limfield (b, key);
973 else
974 lima = a->text + a->length, limb = b->text + b->length;
976 if (key->sword >= 0)
977 texta = begfield (a, key), textb = begfield (b, key);
978 else
980 texta = a->text, textb = b->text;
981 if (key->skipsblanks)
983 while (texta < lima && blanks[UCHAR (*texta)])
984 ++texta;
985 while (textb < limb && blanks[UCHAR (*textb)])
986 ++textb;
990 else
992 /* For the first iteration only, the key positions have
993 been precomputed for us. */
994 texta = a->keybeg, lima = a->keylim;
995 textb = b->keybeg, limb = b->keylim;
998 /* Find the lengths. */
999 lena = lima - texta, lenb = limb - textb;
1000 if (lena < 0)
1001 lena = 0;
1002 if (lenb < 0)
1003 lenb = 0;
1005 if (key->skipeblanks)
1007 char *a_end = texta + lena;
1008 char *b_end = textb + lenb;
1009 trim_trailing_blanks (texta, &a_end);
1010 trim_trailing_blanks (textb, &b_end);
1011 lena = a_end - texta;
1012 lenb = b_end - textb;
1015 /* Actually compare the fields. */
1016 if (key->numeric)
1018 if (*lima || *limb)
1020 char savea = *lima, saveb = *limb;
1022 *lima = *limb = '\0';
1023 diff = numcompare (texta, textb);
1024 *lima = savea, *limb = saveb;
1026 else
1027 diff = numcompare (texta, textb);
1029 if (diff)
1030 return key->reverse ? -diff : diff;
1031 continue;
1033 else if (key->general_numeric)
1035 if (*lima || *limb)
1037 char savea = *lima, saveb = *limb;
1039 *lima = *limb = '\0';
1040 diff = general_numcompare (texta, textb);
1041 *lima = savea, *limb = saveb;
1043 else
1044 diff = general_numcompare (texta, textb);
1046 if (diff)
1047 return key->reverse ? -diff : diff;
1048 continue;
1050 else if (key->month)
1052 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1053 if (diff)
1054 return key->reverse ? -diff : diff;
1055 continue;
1057 else if (ignore && translate)
1059 #define CMP_WITH_IGNORE(A, B) \
1060 do \
1062 while (texta < lima && textb < limb) \
1064 while (texta < lima && ignore[UCHAR (*texta)]) \
1065 ++texta; \
1066 while (textb < limb && ignore[UCHAR (*textb)]) \
1067 ++textb; \
1068 if (texta < lima && textb < limb) \
1070 if ((A) != (B)) \
1072 diff = (A) - (B); \
1073 break; \
1075 ++texta; \
1076 ++textb; \
1079 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1080 diff = -1; \
1081 else if (texta < lima && textb == limb \
1082 && !ignore[UCHAR (*texta)]) \
1083 diff = 1; \
1086 if (diff == 0) \
1088 while (texta < lima && ignore[UCHAR (*texta)]) \
1089 ++texta; \
1090 while (textb < limb && ignore[UCHAR (*textb)]) \
1091 ++textb; \
1093 if (texta == lima && textb < limb) \
1094 diff = -1; \
1095 else if (texta < lima && textb == limb) \
1096 diff = 1; \
1098 /* Relative lengths are meaningless if characters were ignored. \
1099 Handling this case here avoids what might be an invalid length \
1100 comparison below. */ \
1101 if (diff == 0 && texta == lima && textb == limb) \
1102 return 0; \
1104 while (0)
1106 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1107 else if (ignore)
1108 CMP_WITH_IGNORE (*texta, *textb);
1109 else if (translate)
1110 while (texta < lima && textb < limb)
1112 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1114 diff = (translate[UCHAR (*--texta)]
1115 - translate[UCHAR (*--textb)]);
1116 break;
1119 else
1120 diff = memcmp (texta, textb, min (lena, lenb));
1122 if (diff)
1123 return key->reverse ? -diff : diff;
1124 if ((diff = lena - lenb) != 0)
1125 return key->reverse ? -diff : diff;
1128 return 0;
1131 /* Compare two lines A and B, returning negative, zero, or positive
1132 depending on whether A compares less than, equal to, or greater than B. */
1134 static int
1135 compare (register const struct line *a, register const struct line *b)
1137 int diff, tmpa, tmpb, mini;
1139 /* First try to compare on the specified keys (if any).
1140 The only two cases with no key at all are unadorned sort,
1141 and unadorned sort -r. */
1142 if (keyhead.next)
1144 diff = keycompare (a, b);
1145 if (diff != 0)
1146 return diff;
1147 if (unique || stable)
1148 return 0;
1151 /* If the keys all compare equal (or no keys were specified)
1152 fall through to the default byte-by-byte comparison. */
1153 tmpa = a->length, tmpb = b->length;
1154 mini = min (tmpa, tmpb);
1155 if (mini == 0)
1156 diff = tmpa - tmpb;
1157 else
1159 char *ap = a->text, *bp = b->text;
1161 diff = UCHAR (*ap) - UCHAR (*bp);
1162 if (diff == 0)
1164 diff = memcmp (ap, bp, mini);
1165 if (diff == 0)
1166 diff = tmpa - tmpb;
1170 return reverse ? -diff : diff;
1173 /* Check that the lines read from the given FP come in order. Return
1174 1 if they do and 0 if there is a disorder.
1175 FIXME: return number of first out-of-order line if not sorted. */
1177 static int
1178 checkfp (FILE *fp)
1180 struct buffer buf; /* Input buffer. */
1181 struct lines lines; /* Lines scanned from the buffer. */
1182 struct line temp; /* Copy of previous line. */
1183 int cc; /* Character count. */
1184 int alloc, sorted = 1;
1186 initbuf (&buf, mergealloc);
1187 initlines (&lines, mergealloc / linelength + 1,
1188 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1189 alloc = linelength;
1190 temp.text = xmalloc (alloc);
1192 cc = fillbuf (&buf, fp);
1193 if (cc == 0)
1194 goto finish;
1196 findlines (&buf, &lines);
1198 while (1)
1200 struct line *prev_line; /* Pointer to previous line. */
1201 int cmp; /* Result of calling compare. */
1202 int i;
1204 /* Compare each line in the buffer with its successor. */
1205 for (i = 0; i < lines.used - 1; ++i)
1207 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1208 if ((unique && cmp >= 0) || (cmp > 0))
1210 sorted = 0;
1211 goto finish;
1215 /* Save the last line of the buffer and refill the buffer. */
1216 prev_line = lines.lines + (lines.used - 1);
1217 if (prev_line->length > alloc)
1219 while (prev_line->length + 1 > alloc)
1220 alloc *= 2;
1221 temp.text = xrealloc (temp.text, alloc);
1223 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1224 temp.length = prev_line->length;
1225 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1226 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1228 cc = fillbuf (&buf, fp);
1229 if (cc == 0)
1230 break;
1232 findlines (&buf, &lines);
1233 /* Make sure the line saved from the old buffer contents is
1234 less than or equal to the first line of the new buffer. */
1235 cmp = compare (&temp, &lines.lines[0]);
1236 if ((unique && cmp >= 0) || (cmp > 0))
1238 sorted = 0;
1239 break;
1243 finish:
1244 xfclose (fp);
1245 free (buf.buf);
1246 free ((char *) lines.lines);
1247 free (temp.text);
1248 return sorted;
1251 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1252 Close FPS before returning. */
1254 static void
1255 mergefps (FILE **fps, register int nfps, FILE *ofp)
1257 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1258 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1259 struct line saved; /* Saved line for unique check. */
1260 int savedflag = 0; /* True if there is a saved line. */
1261 int savealloc; /* Size allocated for the saved line. */
1262 int cur[NMERGE]; /* Current line in each line table. */
1263 int ord[NMERGE]; /* Table representing a permutation of fps,
1264 such that lines[ord[0]].lines[cur[ord[0]]]
1265 is the smallest line and will be next
1266 output. */
1267 register int i, j, t;
1269 #ifdef lint /* Suppress `used before initialized' warning. */
1270 savealloc = 0;
1271 #endif
1273 /* Allocate space for a saved line if necessary. */
1274 if (unique)
1276 savealloc = linelength;
1277 saved.text = xmalloc (savealloc);
1280 /* Read initial lines from each input file. */
1281 for (i = 0; i < nfps; ++i)
1283 initbuf (&buffer[i], mergealloc);
1284 /* If a file is empty, eliminate it from future consideration. */
1285 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1287 xfclose (fps[i]);
1288 --nfps;
1289 for (j = i; j < nfps; ++j)
1290 fps[j] = fps[j + 1];
1292 if (i == nfps)
1293 free (buffer[i].buf);
1294 else
1296 initlines (&lines[i], mergealloc / linelength + 1,
1297 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1298 findlines (&buffer[i], &lines[i]);
1299 cur[i] = 0;
1303 /* Set up the ord table according to comparisons among input lines.
1304 Since this only reorders two items if one is strictly greater than
1305 the other, it is stable. */
1306 for (i = 0; i < nfps; ++i)
1307 ord[i] = i;
1308 for (i = 1; i < nfps; ++i)
1309 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1310 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1311 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1313 /* Repeatedly output the smallest line until no input remains. */
1314 while (nfps)
1316 /* If uniqified output is turned on, output only the first of
1317 an identical series of lines. */
1318 if (unique)
1320 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1322 write_bytes (saved.text, saved.length, ofp);
1323 putc (eolchar, ofp);
1324 savedflag = 0;
1326 if (!savedflag)
1328 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1330 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1331 savealloc *= 2;
1332 saved.text = xrealloc (saved.text, savealloc);
1334 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1335 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1336 saved.length + 1);
1337 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1339 saved.keybeg = saved.text +
1340 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1341 - lines[ord[0]].lines[cur[ord[0]]].text);
1343 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1345 saved.keylim = saved.text +
1346 (lines[ord[0]].lines[cur[ord[0]]].keylim
1347 - lines[ord[0]].lines[cur[ord[0]]].text);
1349 savedflag = 1;
1352 else
1354 write_bytes (lines[ord[0]].lines[cur[ord[0]]].text,
1355 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1356 putc (eolchar, ofp);
1359 /* Check if we need to read more lines into core. */
1360 if (++cur[ord[0]] == lines[ord[0]].used)
1361 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1363 findlines (&buffer[ord[0]], &lines[ord[0]]);
1364 cur[ord[0]] = 0;
1366 else
1368 /* We reached EOF on fps[ord[0]]. */
1369 for (i = 1; i < nfps; ++i)
1370 if (ord[i] > ord[0])
1371 --ord[i];
1372 --nfps;
1373 xfclose (fps[ord[0]]);
1374 free (buffer[ord[0]].buf);
1375 free ((char *) lines[ord[0]].lines);
1376 for (i = ord[0]; i < nfps; ++i)
1378 fps[i] = fps[i + 1];
1379 buffer[i] = buffer[i + 1];
1380 lines[i] = lines[i + 1];
1381 cur[i] = cur[i + 1];
1383 for (i = 0; i < nfps; ++i)
1384 ord[i] = ord[i + 1];
1385 continue;
1388 /* The new line just read in may be larger than other lines
1389 already in core; push it back in the queue until we encounter
1390 a line larger than it. */
1391 for (i = 1; i < nfps; ++i)
1393 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1394 &lines[ord[i]].lines[cur[ord[i]]]);
1395 if (!t)
1396 t = ord[0] - ord[i];
1397 if (t < 0)
1398 break;
1400 t = ord[0];
1401 for (j = 1; j < i; ++j)
1402 ord[j - 1] = ord[j];
1403 ord[i - 1] = t;
1406 if (unique && savedflag)
1408 write_bytes (saved.text, saved.length, ofp);
1409 putc (eolchar, ofp);
1410 free (saved.text);
1414 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1416 static void
1417 sortlines (struct line *lines, int nlines, struct line *temp)
1419 register struct line *lo, *hi, *t;
1420 register int nlo, nhi;
1422 if (nlines == 2)
1424 if (compare (&lines[0], &lines[1]) > 0)
1425 *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1426 return;
1429 nlo = nlines / 2;
1430 lo = lines;
1431 nhi = nlines - nlo;
1432 hi = lines + nlo;
1434 if (nlo > 1)
1435 sortlines (lo, nlo, temp);
1437 if (nhi > 1)
1438 sortlines (hi, nhi, temp);
1440 t = temp;
1442 while (nlo && nhi)
1443 if (compare (lo, hi) <= 0)
1444 *t++ = *lo++, --nlo;
1445 else
1446 *t++ = *hi++, --nhi;
1447 while (nlo--)
1448 *t++ = *lo++;
1450 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1451 *lo++ = *t++;
1454 /* Check that each of the NFILES FILES is ordered.
1455 Return a count of disordered files. */
1457 static int
1458 check (char **files, int nfiles)
1460 int i, disorders = 0;
1461 FILE *fp;
1463 for (i = 0; i < nfiles; ++i)
1465 fp = xfopen (files[i], "r");
1466 if (!checkfp (fp))
1468 fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]);
1469 ++disorders;
1472 return disorders;
1475 /* Merge NFILES FILES onto OFP. */
1477 static void
1478 merge (char **files, int nfiles, FILE *ofp)
1480 int i, j, t;
1481 char *temp;
1482 FILE *fps[NMERGE], *tfp;
1484 while (nfiles > NMERGE)
1486 t = 0;
1487 for (i = 0; i < nfiles / NMERGE; ++i)
1489 for (j = 0; j < NMERGE; ++j)
1490 fps[j] = xfopen (files[i * NMERGE + j], "r");
1491 tfp = xtmpfopen (temp = tempname ());
1492 mergefps (fps, NMERGE, tfp);
1493 xfclose (tfp);
1494 for (j = 0; j < NMERGE; ++j)
1495 zaptemp (files[i * NMERGE + j]);
1496 files[t++] = temp;
1498 for (j = 0; j < nfiles % NMERGE; ++j)
1499 fps[j] = xfopen (files[i * NMERGE + j], "r");
1500 tfp = xtmpfopen (temp = tempname ());
1501 mergefps (fps, nfiles % NMERGE, tfp);
1502 xfclose (tfp);
1503 for (j = 0; j < nfiles % NMERGE; ++j)
1504 zaptemp (files[i * NMERGE + j]);
1505 files[t++] = temp;
1506 nfiles = t;
1509 for (i = 0; i < nfiles; ++i)
1510 fps[i] = xfopen (files[i], "r");
1511 mergefps (fps, i, ofp);
1512 for (i = 0; i < nfiles; ++i)
1513 zaptemp (files[i]);
1516 /* Sort NFILES FILES onto OFP. */
1518 static void
1519 sort (char **files, int nfiles, FILE *ofp)
1521 struct buffer buf;
1522 struct lines lines;
1523 struct line *tmp;
1524 int i, ntmp;
1525 FILE *fp, *tfp;
1526 struct tempnode *node;
1527 int n_temp_files = 0;
1528 char **tempfiles;
1530 initbuf (&buf, sortalloc);
1531 initlines (&lines, sortalloc / linelength + 1,
1532 LINEALLOC / sizeof (struct line));
1533 ntmp = lines.alloc;
1534 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1536 while (nfiles--)
1538 fp = xfopen (*files++, "r");
1539 while (fillbuf (&buf, fp))
1541 findlines (&buf, &lines);
1542 if (lines.used > ntmp)
1544 while (lines.used > ntmp)
1545 ntmp *= 2;
1546 tmp = (struct line *)
1547 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1549 sortlines (lines.lines, lines.used, tmp);
1550 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1551 tfp = ofp;
1552 else
1554 ++n_temp_files;
1555 tfp = xtmpfopen (tempname ());
1557 for (i = 0; i < lines.used; ++i)
1558 if (!unique || i == 0
1559 || compare (&lines.lines[i], &lines.lines[i - 1]))
1561 write_bytes (lines.lines[i].text, lines.lines[i].length, tfp);
1562 putc (eolchar, tfp);
1564 if (tfp != ofp)
1565 xfclose (tfp);
1567 xfclose (fp);
1570 free (buf.buf);
1571 free ((char *) lines.lines);
1572 free ((char *) tmp);
1574 if (n_temp_files)
1576 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1577 i = n_temp_files;
1578 for (node = temphead.next; i > 0; node = node->next)
1579 tempfiles[--i] = node->name;
1580 merge (tempfiles, n_temp_files, ofp);
1581 free ((char *) tempfiles);
1585 /* Insert key KEY at the end of the list (`keyhead'). */
1587 static void
1588 insertkey (struct keyfield *key)
1590 struct keyfield *k = &keyhead;
1592 while (k->next)
1593 k = k->next;
1594 k->next = key;
1595 key->next = NULL;
1598 static void
1599 badfieldspec (const char *s)
1601 error (SORT_FAILURE, 0, _("invalid field specification `%s'"), s);
1604 /* Handle interrupts and hangups. */
1606 static void
1607 sighandler (int sig)
1609 #ifdef SA_INTERRUPT
1610 struct sigaction sigact;
1612 sigact.sa_handler = SIG_DFL;
1613 sigemptyset (&sigact.sa_mask);
1614 sigact.sa_flags = 0;
1615 sigaction (sig, &sigact, NULL);
1616 #else /* !SA_INTERRUPT */
1617 signal (sig, SIG_DFL);
1618 #endif /* SA_INTERRUPT */
1619 cleanup ();
1620 kill (getpid (), sig);
1623 /* Set the ordering options for KEY specified in S.
1624 Return the address of the first character in S that
1625 is not a valid ordering option.
1626 BLANKTYPE is the kind of blanks that 'b' should skip. */
1628 static char *
1629 set_ordering (register const char *s, struct keyfield *key,
1630 enum blanktype blanktype)
1632 while (*s)
1634 switch (*s)
1636 case 'b':
1637 if (blanktype == bl_start || blanktype == bl_both)
1638 key->skipsblanks = 1;
1639 if (blanktype == bl_end || blanktype == bl_both)
1640 key->skipeblanks = 1;
1641 break;
1642 case 'd':
1643 key->ignore = nondictionary;
1644 break;
1645 case 'f':
1646 key->translate = fold_toupper;
1647 break;
1648 case 'g':
1649 key->general_numeric = 1;
1650 break;
1651 case 'i':
1652 key->ignore = nonprinting;
1653 break;
1654 case 'M':
1655 key->month = 1;
1656 break;
1657 case 'n':
1658 key->numeric = 1;
1659 if (blanktype == bl_start || blanktype == bl_both)
1660 key->skipsblanks = 1;
1661 if (blanktype == bl_end || blanktype == bl_both)
1662 key->skipeblanks = 1;
1663 break;
1664 case 'r':
1665 key->reverse = 1;
1666 break;
1667 default:
1668 return (char *) s;
1670 ++s;
1672 return (char *) s;
1676 main (int argc, char **argv)
1678 struct keyfield *key = NULL, gkey;
1679 char *s;
1680 int i, t, t2;
1681 int checkonly = 0, mergeonly = 0, nfiles = 0;
1682 char *minus = "-", *outfile = minus, **files, *tmp;
1683 FILE *ofp;
1684 #ifdef SA_INTERRUPT
1685 struct sigaction oldact, newact;
1686 #endif /* SA_INTERRUPT */
1688 program_name = argv[0];
1689 setlocale (LC_ALL, "");
1690 bindtextdomain (PACKAGE, LOCALEDIR);
1691 textdomain (PACKAGE);
1693 parse_long_options (argc, argv, "sort", PACKAGE_VERSION, usage);
1695 have_read_stdin = 0;
1696 inittables ();
1698 temp_file_prefix = getenv ("TMPDIR");
1699 if (temp_file_prefix == NULL)
1700 temp_file_prefix = DEFAULT_TMPDIR;
1702 #ifdef SA_INTERRUPT
1703 newact.sa_handler = sighandler;
1704 sigemptyset (&newact.sa_mask);
1705 newact.sa_flags = 0;
1707 sigaction (SIGINT, NULL, &oldact);
1708 if (oldact.sa_handler != SIG_IGN)
1709 sigaction (SIGINT, &newact, NULL);
1710 sigaction (SIGHUP, NULL, &oldact);
1711 if (oldact.sa_handler != SIG_IGN)
1712 sigaction (SIGHUP, &newact, NULL);
1713 sigaction (SIGPIPE, NULL, &oldact);
1714 if (oldact.sa_handler != SIG_IGN)
1715 sigaction (SIGPIPE, &newact, NULL);
1716 sigaction (SIGTERM, NULL, &oldact);
1717 if (oldact.sa_handler != SIG_IGN)
1718 sigaction (SIGTERM, &newact, NULL);
1719 #else /* !SA_INTERRUPT */
1720 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1721 signal (SIGINT, sighandler);
1722 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1723 signal (SIGHUP, sighandler);
1724 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1725 signal (SIGPIPE, sighandler);
1726 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1727 signal (SIGTERM, sighandler);
1728 #endif /* !SA_INTERRUPT */
1730 gkey.sword = gkey.eword = -1;
1731 gkey.ignore = NULL;
1732 gkey.translate = NULL;
1733 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1734 gkey.skipsblanks = gkey.skipeblanks = 0;
1736 files = (char **) xmalloc (sizeof (char *) * argc);
1738 for (i = 1; i < argc; ++i)
1740 if (argv[i][0] == '+')
1742 if (key)
1743 insertkey (key);
1744 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1745 key->eword = -1;
1746 key->ignore = NULL;
1747 key->translate = NULL;
1748 key->skipsblanks = key->skipeblanks = 0;
1749 key->numeric = key->general_numeric = key->month = key->reverse = 0;
1750 s = argv[i] + 1;
1751 if (! (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])])))
1752 badfieldspec (argv[i]);
1753 for (t = 0; digits[UCHAR (*s)]; ++s)
1754 t = 10 * t + *s - '0';
1755 t2 = 0;
1756 if (*s == '.')
1757 for (++s; digits[UCHAR (*s)]; ++s)
1758 t2 = 10 * t2 + *s - '0';
1759 if (t2 || t)
1761 key->sword = t;
1762 key->schar = t2;
1764 else
1765 key->sword = -1;
1766 s = set_ordering (s, key, bl_start);
1767 if (*s)
1768 badfieldspec (argv[i]);
1770 else if (argv[i][0] == '-' && argv[i][1])
1772 s = argv[i] + 1;
1773 if (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])]))
1775 if (!key)
1777 /* Provoke with `sort -9'. */
1778 error (0, 0, _("when using the old-style +POS and -POS \
1779 key specifiers,\nthe +POS specifier must come first"));
1780 usage (SORT_FAILURE);
1782 for (t = 0; digits[UCHAR (*s)]; ++s)
1783 t = t * 10 + *s - '0';
1784 t2 = 0;
1785 if (*s == '.')
1786 for (++s; digits[UCHAR (*s)]; ++s)
1787 t2 = t2 * 10 + *s - '0';
1788 key->eword = t;
1789 key->echar = t2;
1790 s = set_ordering (s, key, bl_end);
1791 if (*s)
1792 badfieldspec (argv[i]);
1793 insertkey (key);
1794 key = NULL;
1796 else
1797 while (*s)
1799 s = set_ordering (s, &gkey, bl_both);
1800 switch (*s)
1802 case '\0':
1803 break;
1804 case 'c':
1805 checkonly = 1;
1806 break;
1807 case 'k':
1808 if (s[1])
1809 ++s;
1810 else
1812 if (i == argc - 1)
1813 error (SORT_FAILURE, 0,
1814 _("option `-k' requires an argument"));
1815 else
1816 s = argv[++i];
1818 if (key)
1819 insertkey (key);
1820 key = (struct keyfield *)
1821 xmalloc (sizeof (struct keyfield));
1822 key->eword = -1;
1823 key->ignore = NULL;
1824 key->translate = NULL;
1825 key->skipsblanks = key->skipeblanks = 0;
1826 key->numeric = key->month = key->reverse = 0;
1827 /* Get POS1. */
1828 if (!digits[UCHAR (*s)])
1829 badfieldspec (argv[i]);
1830 for (t = 0; digits[UCHAR (*s)]; ++s)
1831 t = 10 * t + *s - '0';
1832 if (t == 0)
1834 /* Provoke with `sort -k0' */
1835 error (0, 0, _("the starting field number argument \
1836 to the `-k' option must be positive"));
1837 badfieldspec (argv[i]);
1839 --t;
1840 t2 = 0;
1841 if (*s == '.')
1843 if (!digits[UCHAR (s[1])])
1845 /* Provoke with `sort -k1.' */
1846 error (0, 0, _("starting field spec has `.' but \
1847 lacks following character offset"));
1848 badfieldspec (argv[i]);
1850 for (++s; digits[UCHAR (*s)]; ++s)
1851 t2 = 10 * t2 + *s - '0';
1852 if (t2 == 0)
1854 /* Provoke with `sort -k1.0' */
1855 error (0, 0, _("starting field character offset \
1856 argument to the `-k' option\nmust be positive"));
1857 badfieldspec (argv[i]);
1859 --t2;
1861 if (t2 || t)
1863 key->sword = t;
1864 key->schar = t2;
1866 else
1867 key->sword = -1;
1868 s = set_ordering (s, key, bl_start);
1869 if (*s == 0)
1871 key->eword = -1;
1872 key->echar = 0;
1874 else if (*s != ',')
1875 badfieldspec (argv[i]);
1876 else if (*s == ',')
1878 /* Skip over comma. */
1879 ++s;
1880 if (*s == 0)
1882 /* Provoke with `sort -k1,' */
1883 error (0, 0, _("field specification has `,' but \
1884 lacks following field spec"));
1885 badfieldspec (argv[i]);
1887 /* Get POS2. */
1888 for (t = 0; digits[UCHAR (*s)]; ++s)
1889 t = t * 10 + *s - '0';
1890 if (t == 0)
1892 /* Provoke with `sort -k1,0' */
1893 error (0, 0, _("ending field number argument \
1894 to the `-k' option must be positive"));
1895 badfieldspec (argv[i]);
1897 --t;
1898 t2 = 0;
1899 if (*s == '.')
1901 if (!digits[UCHAR (s[1])])
1903 /* Provoke with `sort -k1,1.' */
1904 error (0, 0, _("ending field spec has `.' \
1905 but lacks following character offset"));
1906 badfieldspec (argv[i]);
1908 for (++s; digits[UCHAR (*s)]; ++s)
1909 t2 = t2 * 10 + *s - '0';
1911 else
1913 /* `-k 2,3' is equivalent to `+1 -3'. */
1914 ++t;
1916 key->eword = t;
1917 key->echar = t2;
1918 s = set_ordering (s, key, bl_end);
1919 if (*s)
1920 badfieldspec (argv[i]);
1922 insertkey (key);
1923 key = NULL;
1924 goto outer;
1925 case 'm':
1926 mergeonly = 1;
1927 break;
1928 case 'o':
1929 if (s[1])
1930 outfile = s + 1;
1931 else
1933 if (i == argc - 1)
1934 error (SORT_FAILURE, 0,
1935 _("option `-o' requires an argument"));
1936 else
1937 outfile = argv[++i];
1939 goto outer;
1940 case 's':
1941 stable = 1;
1942 break;
1943 case 't':
1944 if (s[1])
1945 tab = *++s;
1946 else if (i < argc - 1)
1948 tab = *argv[++i];
1949 goto outer;
1951 else
1952 error (SORT_FAILURE, 0,
1953 _("option `-t' requires an argument"));
1954 break;
1955 case 'T':
1956 if (s[1])
1957 temp_file_prefix = ++s;
1958 else
1960 if (i < argc - 1)
1961 temp_file_prefix = argv[++i];
1962 else
1963 error (SORT_FAILURE, 0,
1964 _("option `-T' requires an argument"));
1966 goto outer;
1967 /* break; */
1968 case 'u':
1969 unique = 1;
1970 break;
1971 case 'z':
1972 eolchar = 0;
1973 break;
1974 case 'y':
1975 /* Accept and ignore e.g. -y0 for compatibility with
1976 Solaris 2. */
1977 goto outer;
1978 default:
1979 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
1980 argv[0], *s);
1981 usage (SORT_FAILURE);
1983 if (*s)
1984 ++s;
1987 else /* Not an option. */
1989 files[nfiles++] = argv[i];
1991 outer:;
1994 if (key)
1995 insertkey (key);
1997 /* Inheritance of global options to individual keys. */
1998 for (key = keyhead.next; key; key = key->next)
1999 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2000 && !key->skipeblanks && !key->month && !key->numeric
2001 && !key->general_numeric)
2003 key->ignore = gkey.ignore;
2004 key->translate = gkey.translate;
2005 key->skipsblanks = gkey.skipsblanks;
2006 key->skipeblanks = gkey.skipeblanks;
2007 key->month = gkey.month;
2008 key->numeric = gkey.numeric;
2009 key->general_numeric = gkey.general_numeric;
2010 key->reverse = gkey.reverse;
2013 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2014 || gkey.skipeblanks || gkey.month || gkey.numeric
2015 || gkey.general_numeric))
2016 insertkey (&gkey);
2017 reverse = gkey.reverse;
2019 if (nfiles == 0)
2021 nfiles = 1;
2022 files = &minus;
2025 if (checkonly)
2027 /* POSIX requires that sort return 1 IFF invoked with -c and the
2028 input is not properly sorted. */
2029 exit (check (files, nfiles) == 0 ? 0 : 1);
2032 if (strcmp (outfile, "-"))
2034 struct stat outstat;
2035 if (stat (outfile, &outstat) == 0)
2037 /* The following code prevents a race condition when
2038 people use the brain dead shell programming idiom:
2039 cat file | sort -o file
2040 This feature is provided for historical compatibility,
2041 but we strongly discourage ever relying on this in
2042 new shell programs. */
2044 /* Temporarily copy each input file that might be another name
2045 for the output file. When in doubt (e.g. a pipe), copy. */
2046 for (i = 0; i < nfiles; ++i)
2048 char buf[8192];
2049 FILE *fp;
2050 int cc;
2052 if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
2054 struct stat instat;
2055 if ((strcmp (files[i], "-")
2056 ? stat (files[i], &instat)
2057 : fstat (fileno (stdin), &instat)) != 0)
2059 error (0, errno, "%s", files[i]);
2060 cleanup ();
2061 exit (SORT_FAILURE);
2063 if (S_ISREG (instat.st_mode)
2064 && (instat.st_ino != outstat.st_ino
2065 || instat.st_dev != outstat.st_dev))
2067 /* We know the files are distinct. */
2068 continue;
2072 fp = xfopen (files[i], "r");
2073 tmp = tempname ();
2074 ofp = xtmpfopen (tmp);
2075 while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
2076 write_bytes (buf, cc, ofp);
2077 if (ferror (fp))
2079 error (0, errno, "%s", files[i]);
2080 cleanup ();
2081 exit (SORT_FAILURE);
2083 xfclose (ofp);
2084 xfclose (fp);
2085 files[i] = tmp;
2088 ofp = xfopen (outfile, "w");
2090 else
2091 ofp = stdout;
2093 if (mergeonly)
2094 merge (files, nfiles, ofp);
2095 else
2096 sort (files, nfiles, ofp);
2097 cleanup ();
2099 /* If we wait for the implicit flush on exit, and the parent process
2100 has closed stdout (e.g., exec >&- in a shell), then the output file
2101 winds up empty. I don't understand why. This is under SunOS,
2102 Solaris, Ultrix, and Irix. This premature fflush makes the output
2103 reappear. --karl@cs.umb.edu */
2104 if (fflush (ofp) < 0)
2105 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2107 if (have_read_stdin && fclose (stdin) == EOF)
2108 error (SORT_FAILURE, errno, outfile);
2109 if (ferror (stdout) || fclose (stdout) == EOF)
2110 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2112 exit (EXIT_SUCCESS);