.
[coreutils.git] / src / sort.c
blob0b116876463a4ec8462200dc3fd8c103fc3f6270
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 91, 92, 93, 94, 95, 1996 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
22 #include <config.h>
24 /* Get isblank from GNU libc. */
25 #define _GNU_SOURCE
27 #include <sys/types.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #ifndef ENABLE_ASSERTIONS
31 # define NDEBUG 1
32 #endif
33 #include <assert.h>
34 #include "system.h"
35 #include "long-options.h"
36 #include "error.h"
37 #include "xstrtod.h"
39 #ifdef HAVE_LIMITS_H
40 #include <limits.h>
41 #else
42 #ifndef UCHAR_MAX
43 #define UCHAR_MAX 255
44 #endif
45 #endif
46 #ifndef STDC_HEADERS
47 char *malloc ();
48 char *realloc ();
49 void free ();
50 #endif
52 /* Undefine, to avoid warning about redefinition on some systems. */
53 #undef min
54 #define min(a, b) ((a) < (b) ? (a) : (b))
56 #define UCHAR_LIM (UCHAR_MAX + 1)
57 #define UCHAR(c) ((unsigned char) (c))
59 #ifndef DEFAULT_TMPDIR
60 #define DEFAULT_TMPDIR "/tmp"
61 #endif
63 /* Use this as exit status in case of error, not EXIT_FAILURE. This
64 is necessary because EXIT_FAILURE is usually 1 and POSIX requires
65 that sort exit with status 1 IFF invoked with -c and the input is
66 not properly sorted. Any other irregular exit must exit with a
67 status code greater than 1. */
68 #define SORT_FAILURE 2
70 /* The kind of blanks for '-b' to skip in various options. */
71 enum blanktype { bl_start, bl_end, bl_both };
73 /* The character marking end of line. Default to \n. */
74 int eolchar = '\n';
76 /* Lines are held in core as counted strings. */
77 struct line
79 char *text; /* Text of the line. */
80 int length; /* Length not including final newline. */
81 char *keybeg; /* Start of first key. */
82 char *keylim; /* Limit of first key. */
85 /* Arrays of lines. */
86 struct lines
88 struct line *lines; /* Dynamically allocated array of lines. */
89 int used; /* Number of slots used. */
90 int alloc; /* Number of slots allocated. */
91 int limit; /* Max number of slots to allocate. */
94 /* Input buffers. */
95 struct buffer
97 char *buf; /* Dynamically allocated buffer. */
98 int used; /* Number of bytes used. */
99 int alloc; /* Number of bytes allocated. */
100 int left; /* Number of bytes left after line parsing. */
103 struct keyfield
105 int sword; /* Zero-origin 'word' to start at. */
106 int schar; /* Additional characters to skip. */
107 int skipsblanks; /* Skip leading white space at start. */
108 int eword; /* Zero-origin first word after field. */
109 int echar; /* Additional characters in field. */
110 int skipeblanks; /* Skip trailing white space at finish. */
111 int *ignore; /* Boolean array of characters to ignore. */
112 char *translate; /* Translation applied to characters. */
113 int numeric; /* Flag for numeric comparison. Handle
114 strings of digits with optional decimal
115 point, but no exponential notation. */
116 int general_numeric; /* Flag for general, numeric comparison.
117 Handle numbers in exponential notation. */
118 int month; /* Flag for comparison by month name. */
119 int reverse; /* Reverse the sense of comparison. */
120 struct keyfield *next; /* Next keyfield to try. */
123 struct month
125 char *name;
126 int val;
129 /* The name this program was run with. */
130 char *program_name;
132 /* Table of white space. */
133 static int blanks[UCHAR_LIM];
135 /* Table of non-printing characters. */
136 static int nonprinting[UCHAR_LIM];
138 /* Table of non-dictionary characters (not letters, digits, or blanks). */
139 static int nondictionary[UCHAR_LIM];
141 /* Translation table folding lower case to upper. */
142 static char fold_toupper[UCHAR_LIM];
144 /* Table mapping 3-letter month names to integers.
145 Alphabetic order allows binary search. */
146 static struct month const monthtab[] =
148 {"APR", 4},
149 {"AUG", 8},
150 {"DEC", 12},
151 {"FEB", 2},
152 {"JAN", 1},
153 {"JUL", 7},
154 {"JUN", 6},
155 {"MAR", 3},
156 {"MAY", 5},
157 {"NOV", 11},
158 {"OCT", 10},
159 {"SEP", 9}
162 /* During the merge phase, the number of files to merge at once. */
163 #define NMERGE 16
165 /* Initial buffer size for in core sorting. Will not grow unless a
166 line longer than this is seen. */
167 static int sortalloc = 512 * 1024;
169 /* Initial buffer size for in core merge buffers. Bear in mind that
170 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
171 static int mergealloc = 16 * 1024;
173 /* Guess of average line length. */
174 static int linelength = 30;
176 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
177 #define LINEALLOC (256 * 1024)
179 /* Prefix for temporary file names. */
180 static char *temp_file_prefix;
182 /* Flag to reverse the order of all comparisons. */
183 static int reverse;
185 /* Flag for stable sort. This turns off the last ditch bytewise
186 comparison of lines, and instead leaves lines in the same order
187 they were read if all keys compare equal. */
188 static int stable;
190 /* Tab character separating fields. If NUL, then fields are separated
191 by the empty string between a non-whitespace character and a whitespace
192 character. */
193 static char tab;
195 /* Flag to remove consecutive duplicate lines from the output.
196 Only the last of a sequence of equal lines will be output. */
197 static int unique;
199 /* Nonzero if any of the input files are the standard input. */
200 static int have_read_stdin;
202 /* Lists of key field comparisons to be tried. */
203 static struct keyfield keyhead;
205 static void
206 usage (int status)
208 if (status != 0)
209 fprintf (stderr, _("Try `%s --help' for more information.\n"),
210 program_name);
211 else
213 printf (_("\
214 Usage: %s [OPTION]... [FILE]...\n\
216 program_name);
217 printf (_("\
218 Write sorted concatenation of all FILE(s) to standard output.\n\
220 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
221 -b ignore leading blanks in sort fields or keys\n\
222 -c check if given files already sorted, do not sort\n\
223 -d consider only [a-zA-Z0-9 ] characters in keys\n\
224 -f fold lower case to upper case characters in keys\n\
225 -g compare according to general numerical value, imply -b\n\
226 -i consider only [\\040-\\0176] characters in keys\n\
227 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
228 -m merge already sorted files, do not sort\n\
229 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
230 -n compare according to string numerical value, imply -b\n\
231 -o FILE write result on FILE instead of standard output\n\
232 -r reverse the result of comparisons\n\
233 -s stabilize sort by disabling last resort comparison\n\
234 -t SEP use SEParator instead of non- to whitespace transition\n\
235 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
236 -u with -c, check for strict ordering;\n\
237 with -m, only output the first of an equal sequence\n\
238 -z end lines with 0 byte, not newline, for find -print0\n\
239 --help display this help and exit\n\
240 --version output version information and exit\n\
242 POS is F[.C][OPTS], where F is the field number and C the character\n\
243 position in the field, both counted from zero. OPTS is made up of one\n\
244 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
245 for that key. If no key given, use the entire line as key. With no\n\
246 FILE, or when FILE is -, read standard input.\n\
248 , DEFAULT_TMPDIR);
249 puts (_("\nReport bugs to textutils-bugs@gnu.ai.mit.edu"));
251 /* Don't use EXIT_FAILURE here in case it is defined to be 1.
252 POSIX requires that sort return 1 IFF invoked with -c and
253 the input is not properly sorted. */
254 assert (status == 0 || status == SORT_FAILURE);
255 exit (status);
258 /* The list of temporary files. */
259 static struct tempnode
261 char *name;
262 struct tempnode *next;
263 } temphead;
265 /* Clean up any remaining temporary files. */
267 static void
268 cleanup (void)
270 struct tempnode *node;
272 for (node = temphead.next; node; node = node->next)
273 unlink (node->name);
276 /* Allocate N bytes of memory dynamically, with error checking. */
278 static char *
279 xmalloc (unsigned int n)
281 char *p;
283 p = malloc (n);
284 if (p == 0)
286 error (0, 0, _("virtual memory exhausted"));
287 cleanup ();
288 exit (SORT_FAILURE);
290 return p;
293 /* Change the size of an allocated block of memory P to N bytes,
294 with error checking.
295 If P is NULL, run xmalloc.
296 If N is 0, run free and return NULL. */
298 static char *
299 xrealloc (char *p, unsigned int n)
301 if (p == 0)
302 return xmalloc (n);
303 if (n == 0)
305 free (p);
306 return 0;
308 p = realloc (p, n);
309 if (p == 0)
311 error (0, 0, _("virtual memory exhausted"));
312 cleanup ();
313 exit (SORT_FAILURE);
315 return p;
318 static FILE *
319 xtmpfopen (const char *file)
321 FILE *fp;
322 int fd;
324 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
325 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
327 error (0, errno, "%s", file);
328 cleanup ();
329 exit (SORT_FAILURE);
332 return fp;
335 static FILE *
336 xfopen (const char *file, const char *how)
338 FILE *fp;
340 if (strcmp (file, "-") == 0)
342 fp = stdin;
344 else
346 if ((fp = fopen (file, how)) == NULL)
348 error (0, errno, "%s", file);
349 cleanup ();
350 exit (SORT_FAILURE);
354 if (fp == stdin)
355 have_read_stdin = 1;
356 return fp;
359 static void
360 xfclose (FILE *fp)
362 if (fp == stdin)
364 /* Allow reading stdin from tty more than once. */
365 if (feof (fp))
366 clearerr (fp);
368 else if (fp == stdout)
370 if (fflush (fp) != 0)
372 error (0, errno, _("flushing file"));
373 cleanup ();
374 exit (SORT_FAILURE);
377 else
379 if (fclose (fp) != 0)
381 error (0, errno, _("error closing file"));
382 cleanup ();
383 exit (SORT_FAILURE);
388 static void
389 write_bytes (const char *buf, size_t n_bytes, FILE *fp)
391 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
393 error (0, errno, _("write error"));
394 cleanup ();
395 exit (SORT_FAILURE);
399 /* Return a name for a temporary file. */
401 static char *
402 tempname (void)
404 static unsigned int seq;
405 int len = strlen (temp_file_prefix);
406 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
407 struct tempnode *node;
409 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
410 sprintf (name,
411 "%s%ssort%5.5d%5.5d",
412 temp_file_prefix,
413 (len && temp_file_prefix[len - 1] != '/') ? "/" : "",
414 (unsigned int) getpid () & 0xffff, seq);
416 /* Make sure that SEQ's value fits in 5 digits. */
417 ++seq;
418 if (seq >= 100000)
419 seq = 0;
421 node->name = name;
422 node->next = temphead.next;
423 temphead.next = node;
424 return name;
427 /* Search through the list of temporary files for NAME;
428 remove it if it is found on the list. */
430 static void
431 zaptemp (char *name)
433 struct tempnode *node, *temp;
435 for (node = &temphead; node->next; node = node->next)
436 if (!strcmp (name, node->next->name))
437 break;
438 if (node->next)
440 temp = node->next;
441 unlink (temp->name);
442 free (temp->name);
443 node->next = temp->next;
444 free ((char *) temp);
448 /* Initialize the character class tables. */
450 static void
451 inittables (void)
453 int i;
455 for (i = 0; i < UCHAR_LIM; ++i)
457 if (ISBLANK (i))
458 blanks[i] = 1;
459 if (!ISPRINT (i))
460 nonprinting[i] = 1;
461 if (!ISALNUM (i) && !ISBLANK (i))
462 nondictionary[i] = 1;
463 if (ISLOWER (i))
464 fold_toupper[i] = toupper (i);
465 else
466 fold_toupper[i] = i;
470 /* Initialize BUF, allocating ALLOC bytes initially. */
472 static void
473 initbuf (struct buffer *buf, int alloc)
475 buf->alloc = alloc;
476 buf->buf = xmalloc (buf->alloc);
477 buf->used = buf->left = 0;
480 /* Fill BUF reading from FP, moving buf->left bytes from the end
481 of buf->buf to the beginning first. If EOF is reached and the
482 file wasn't terminated by a newline, supply one. Return a count
483 of bytes buffered. */
485 static int
486 fillbuf (struct buffer *buf, FILE *fp)
488 int cc;
490 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
491 buf->used = buf->left;
493 while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, eolchar, buf->used)))
495 if (buf->used == buf->alloc)
497 buf->alloc *= 2;
498 buf->buf = xrealloc (buf->buf, buf->alloc);
500 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
501 if (ferror (fp))
503 error (0, errno, _("read error"));
504 cleanup ();
505 exit (SORT_FAILURE);
507 buf->used += cc;
510 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
512 if (buf->used == buf->alloc)
514 buf->alloc *= 2;
515 buf->buf = xrealloc (buf->buf, buf->alloc);
517 buf->buf[buf->used++] = eolchar;
520 return buf->used;
523 /* Initialize LINES, allocating space for ALLOC lines initially.
524 LIMIT is the maximum possible number of lines to allocate space
525 for, ever. */
527 static void
528 initlines (struct lines *lines, int alloc, int limit)
530 lines->alloc = alloc;
531 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
532 lines->used = 0;
533 lines->limit = limit;
536 /* Return a pointer to the first character of the field specified
537 by KEY in LINE. */
539 static char *
540 begfield (const struct line *line, const struct keyfield *key)
542 register char *ptr = line->text, *lim = ptr + line->length;
543 register int sword = key->sword, schar = key->schar;
545 if (tab)
546 while (ptr < lim && sword--)
548 while (ptr < lim && *ptr != tab)
549 ++ptr;
550 if (ptr < lim)
551 ++ptr;
553 else
554 while (ptr < lim && sword--)
556 while (ptr < lim && blanks[UCHAR (*ptr)])
557 ++ptr;
558 while (ptr < lim && !blanks[UCHAR (*ptr)])
559 ++ptr;
562 if (key->skipsblanks)
563 while (ptr < lim && blanks[UCHAR (*ptr)])
564 ++ptr;
566 if (ptr + schar <= lim)
567 ptr += schar;
568 else
569 ptr = lim;
571 return ptr;
574 /* Return the limit of (a pointer to the first character after) the field
575 in LINE specified by KEY. */
577 static char *
578 limfield (const struct line *line, const struct keyfield *key)
580 register char *ptr = line->text, *lim = ptr + line->length;
581 register int eword = key->eword, echar = key->echar;
583 /* Note: from the POSIX spec:
584 The leading field separator itself is included in
585 a field when -t is not used. FIXME: move this comment up... */
587 /* Move PTR past EWORD fields or to one past the last byte on LINE,
588 whichever comes first. If there are more than EWORD fields, leave
589 PTR pointing at the beginning of the field having zero-based index,
590 EWORD. If a delimiter character was specified (via -t), then that
591 `beginning' is the first character following the delimiting TAB.
592 Otherwise, leave PTR pointing at the first `blank' character after
593 the preceding field. */
594 if (tab)
595 while (ptr < lim && eword--)
597 while (ptr < lim && *ptr != tab)
598 ++ptr;
599 if (ptr < lim && (eword || echar > 0))
600 ++ptr;
602 else
603 while (ptr < lim && eword--)
605 while (ptr < lim && blanks[UCHAR (*ptr)])
606 ++ptr;
607 while (ptr < lim && !blanks[UCHAR (*ptr)])
608 ++ptr;
611 #ifdef POSIX_UNSPECIFIED
612 /* The following block of code makes GNU sort incompatible with
613 standard Unix sort, so it's ifdef'd out for now.
614 The POSIX spec isn't clear on how to interpret this.
615 FIXME: request clarification.
617 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
618 Date: Thu, 30 May 96 12:20:41 -0400
620 [...]I believe I've found another bug in `sort'.
622 $ cat /tmp/sort.in
623 a b c 2 d
624 pq rs 1 t
625 $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
626 a b c 2 d
627 pq rs 1 t
628 $ /bin/sort +0.6 -0.7 </tmp/sort.in
629 pq rs 1 t
630 a b c 2 d
632 Unix sort produced the answer I expected: sort on the single character
633 in column 6. GNU sort produced different results, because it disagrees
634 on the interpretation of the key-end spec "-M.N". Unix sort reads this
635 as "skip M fields, then N characters"; but GNU sort wants it to mean
636 "skip M fields, then either N characters or the rest of the current
637 field, whichever comes first". This extra clause applies only to
638 key-ends, not key-starts.
641 /* Make LIM point to the end of (one byte past) the current field. */
642 if (tab)
644 char *newlim;
645 newlim = memchr (ptr, tab, lim - ptr);
646 if (newlim)
647 lim = newlim;
649 else
651 char *newlim;
652 newlim = ptr;
653 while (newlim < lim && blanks[UCHAR (*newlim)])
654 ++newlim;
655 while (newlim < lim && !blanks[UCHAR (*newlim)])
656 ++newlim;
657 lim = newlim;
659 #endif
661 /* If we're skipping leading blanks, don't start counting characters
662 until after skipping past any leading blanks. */
663 if (key->skipsblanks)
664 while (ptr < lim && blanks[UCHAR (*ptr)])
665 ++ptr;
667 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
668 if (ptr + echar <= lim)
669 ptr += echar;
670 else
671 ptr = lim;
673 return ptr;
676 /* FIXME */
678 void
679 trim_trailing_blanks (const char *a_start, char **a_end)
681 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
682 --(*a_end);
685 /* Find the lines in BUF, storing pointers and lengths in LINES.
686 Also replace newlines in BUF with NULs. */
688 static void
689 findlines (struct buffer *buf, struct lines *lines)
691 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
692 struct keyfield *key = keyhead.next;
694 lines->used = 0;
696 while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))
697 && lines->used < lines->limit)
699 /* There are various places in the code that rely on a NUL
700 being at the end of in-core lines; NULs inside the lines
701 will not cause trouble, though. */
702 *ptr = '\0';
704 if (lines->used == lines->alloc)
706 lines->alloc *= 2;
707 lines->lines = (struct line *)
708 xrealloc ((char *) lines->lines,
709 lines->alloc * sizeof (struct line));
712 lines->lines[lines->used].text = beg;
713 lines->lines[lines->used].length = ptr - beg;
715 /* Precompute the position of the first key for efficiency. */
716 if (key)
718 if (key->eword >= 0)
719 lines->lines[lines->used].keylim =
720 limfield (&lines->lines[lines->used], key);
721 else
722 lines->lines[lines->used].keylim = ptr;
724 if (key->sword >= 0)
725 lines->lines[lines->used].keybeg =
726 begfield (&lines->lines[lines->used], key);
727 else
729 if (key->skipsblanks)
730 while (blanks[UCHAR (*beg)])
731 ++beg;
732 lines->lines[lines->used].keybeg = beg;
734 if (key->skipeblanks)
736 trim_trailing_blanks (lines->lines[lines->used].keybeg,
737 &lines->lines[lines->used].keylim);
740 else
742 lines->lines[lines->used].keybeg = 0;
743 lines->lines[lines->used].keylim = 0;
746 ++lines->used;
747 beg = ptr + 1;
750 buf->left = lim - beg;
753 /* Compare strings A and B containing decimal fractions < 1. Each string
754 should begin with a decimal point followed immediately by the digits
755 of the fraction. Strings not of this form are considered to be zero. */
757 static int
758 fraccompare (register const char *a, register const char *b)
760 register int tmpa = *a;
761 register int tmpb = *b;
763 if (tmpa == '.' && tmpb == '.')
766 tmpa = *++a, tmpb = *++b;
767 while (tmpa == tmpb && ISDIGIT (tmpa));
768 if (ISDIGIT (tmpa) && ISDIGIT (tmpb))
769 return tmpa - tmpb;
770 if (ISDIGIT (tmpa))
772 while (tmpa == '0')
773 tmpa = *++a;
774 if (ISDIGIT (tmpa))
775 return 1;
776 return 0;
778 if (ISDIGIT (tmpb))
780 while (tmpb == '0')
781 tmpb = *++b;
782 if (ISDIGIT (tmpb))
783 return -1;
784 return 0;
786 return 0;
788 else if (tmpa == '.')
791 tmpa = *++a;
792 while (tmpa == '0');
793 if (ISDIGIT (tmpa))
794 return 1;
795 return 0;
797 else if (tmpb == '.')
800 tmpb = *++b;
801 while (tmpb == '0');
802 if (ISDIGIT (tmpb))
803 return -1;
804 return 0;
806 return 0;
809 /* Compare strings A and B as numbers without explicitly converting them to
810 machine numbers. Comparatively slow for short strings, but asymptotically
811 hideously fast. */
813 static int
814 numcompare (register const char *a, register const char *b)
816 register int tmpa, tmpb, loga, logb, tmp;
818 tmpa = UCHAR (*a);
819 tmpb = UCHAR (*b);
821 while (blanks[tmpa])
822 tmpa = UCHAR (*++a);
823 while (blanks[tmpb])
824 tmpb = UCHAR (*++b);
826 if (tmpa == '-')
829 tmpa = *++a;
830 while (tmpa == '0');
831 if (tmpb != '-')
833 if (tmpa == '.')
835 tmpa = *++a;
836 while (tmpa == '0');
837 if (ISDIGIT (tmpa))
838 return -1;
839 while (tmpb == '0')
840 tmpb = *++b;
841 if (tmpb == '.')
843 tmpb = *++b;
844 while (tmpb == '0');
845 if (ISDIGIT (tmpb))
846 return -1;
847 return 0;
850 tmpb = *++b;
851 while (tmpb == '0');
853 while (tmpa == tmpb && ISDIGIT (tmpa))
854 tmpa = *++a, tmpb = *++b;
856 if ((tmpa == '.' && !ISDIGIT (tmpb))
857 || (tmpb == '.' && !ISDIGIT (tmpa)))
858 return -fraccompare (a, b);
860 if (ISDIGIT (tmpa))
861 for (loga = 1; ISDIGIT (*++a); ++loga)
863 else
864 loga = 0;
866 if (ISDIGIT (tmpb))
867 for (logb = 1; ISDIGIT (*++b); ++logb)
869 else
870 logb = 0;
872 if ((tmp = logb - loga) != 0)
873 return tmp;
875 if (!loga)
876 return 0;
878 return tmpb - tmpa;
880 else if (tmpb == '-')
883 tmpb = *++b;
884 while (tmpb == '0');
885 if (tmpb == '.')
887 tmpb = *++b;
888 while (tmpb == '0');
889 if (ISDIGIT (tmpb))
890 return 1;
891 while (tmpa == '0')
892 tmpa = *++a;
893 if (tmpa == '.')
895 tmpa = *++a;
896 while (tmpa == '0');
897 if (ISDIGIT (tmpa))
898 return 1;
899 return 0;
901 else
903 while (tmpa == '0')
904 tmpa = *++a;
905 while (tmpb == '0')
906 tmpb = *++b;
908 while (tmpa == tmpb && ISDIGIT (tmpa))
909 tmpa = *++a, tmpb = *++b;
911 if ((tmpa == '.' && !ISDIGIT (tmpb))
912 || (tmpb == '.' && !ISDIGIT (tmpa)))
913 return fraccompare (a, b);
915 if (ISDIGIT (tmpa))
916 for (loga = 1; ISDIGIT (*++a); ++loga)
918 else
919 loga = 0;
921 if (ISDIGIT (tmpb))
922 for (logb = 1; ISDIGIT (*++b); ++logb)
924 else
925 logb = 0;
927 if ((tmp = loga - logb) != 0)
928 return tmp;
930 if (!loga)
931 return 0;
933 return tmpa - tmpb;
937 static int
938 general_numcompare (const char *sa, const char *sb)
940 double a, b;
941 /* FIXME: add option to warn about failed conversions. */
942 /* FIXME: maybe add option to try expensive FP conversion
943 only if A and B can't be compared more cheaply/accurately. */
944 if (xstrtod (sa, NULL, &a))
946 a = 0;
948 if (xstrtod (sb, NULL, &b))
950 b = 0;
952 return a == b ? 0 : a < b ? -1 : 1;
955 /* Return an integer <= 12 associated with month name S with length LEN,
956 0 if the name in S is not recognized. */
958 static int
959 getmonth (const char *s, int len)
961 char month[4];
962 register int i, lo = 0, hi = 12;
964 while (len > 0 && blanks[UCHAR(*s)])
965 ++s, --len;
967 if (len < 3)
968 return 0;
970 for (i = 0; i < 3; ++i)
971 month[i] = fold_toupper[UCHAR (s[i])];
972 month[3] = '\0';
974 while (hi - lo > 1)
975 if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
976 hi = (lo + hi) / 2;
977 else
978 lo = (lo + hi) / 2;
979 if (!strcmp (month, monthtab[lo].name))
980 return monthtab[lo].val;
981 return 0;
984 /* Compare two lines A and B trying every key in sequence until there
985 are no more keys or a difference is found. */
987 static int
988 keycompare (const struct line *a, const struct line *b)
990 register char *texta, *textb, *lima, *limb;
991 register unsigned char *translate;
992 register int *ignore;
993 struct keyfield *key;
994 int diff = 0, iter = 0, lena, lenb;
996 for (key = keyhead.next; key; key = key->next, ++iter)
998 ignore = key->ignore;
999 translate = (unsigned char *) key->translate;
1001 /* Find the beginning and limit of each field. */
1002 if (iter || a->keybeg == NULL || b->keybeg == NULL)
1004 if (key->eword >= 0)
1005 lima = limfield (a, key), limb = limfield (b, key);
1006 else
1007 lima = a->text + a->length, limb = b->text + b->length;
1009 if (key->sword >= 0)
1010 texta = begfield (a, key), textb = begfield (b, key);
1011 else
1013 texta = a->text, textb = b->text;
1014 if (key->skipsblanks)
1016 while (texta < lima && blanks[UCHAR (*texta)])
1017 ++texta;
1018 while (textb < limb && blanks[UCHAR (*textb)])
1019 ++textb;
1023 else
1025 /* For the first iteration only, the key positions have
1026 been precomputed for us. */
1027 texta = a->keybeg, lima = a->keylim;
1028 textb = b->keybeg, limb = b->keylim;
1031 /* Find the lengths. */
1032 lena = lima - texta, lenb = limb - textb;
1033 if (lena < 0)
1034 lena = 0;
1035 if (lenb < 0)
1036 lenb = 0;
1038 if (key->skipeblanks)
1040 char *a_end = texta + lena;
1041 char *b_end = textb + lenb;
1042 trim_trailing_blanks (texta, &a_end);
1043 trim_trailing_blanks (textb, &b_end);
1044 lena = a_end - texta;
1045 lenb = b_end - textb;
1048 /* Actually compare the fields. */
1049 if (key->numeric)
1051 if (*lima || *limb)
1053 char savea = *lima, saveb = *limb;
1055 *lima = *limb = '\0';
1056 diff = numcompare (texta, textb);
1057 *lima = savea, *limb = saveb;
1059 else
1060 diff = numcompare (texta, textb);
1062 if (diff)
1063 return key->reverse ? -diff : diff;
1064 continue;
1066 else if (key->general_numeric)
1068 if (*lima || *limb)
1070 char savea = *lima, saveb = *limb;
1072 *lima = *limb = '\0';
1073 diff = general_numcompare (texta, textb);
1074 *lima = savea, *limb = saveb;
1076 else
1077 diff = general_numcompare (texta, textb);
1079 if (diff)
1080 return key->reverse ? -diff : diff;
1081 continue;
1083 else if (key->month)
1085 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1086 if (diff)
1087 return key->reverse ? -diff : diff;
1088 continue;
1090 else if (ignore && translate)
1092 #define CMP_WITH_IGNORE(A, B) \
1093 do \
1095 while (texta < lima && textb < limb) \
1097 while (texta < lima && ignore[UCHAR (*texta)]) \
1098 ++texta; \
1099 while (textb < limb && ignore[UCHAR (*textb)]) \
1100 ++textb; \
1101 if (texta < lima && textb < limb) \
1103 if ((A) != (B)) \
1105 diff = (A) - (B); \
1106 break; \
1108 ++texta; \
1109 ++textb; \
1112 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1113 diff = -1; \
1114 else if (texta < lima && textb == limb \
1115 && !ignore[UCHAR (*texta)]) \
1116 diff = 1; \
1119 if (diff == 0) \
1121 while (texta < lima && ignore[UCHAR (*texta)]) \
1122 ++texta; \
1123 while (textb < limb && ignore[UCHAR (*textb)]) \
1124 ++textb; \
1126 if (texta == lima && textb < limb) \
1127 diff = -1; \
1128 else if (texta < lima && textb == limb) \
1129 diff = 1; \
1131 /* Relative lengths are meaningless if characters were ignored. \
1132 Handling this case here avoids what might be an invalid length \
1133 comparison below. */ \
1134 if (diff == 0 && texta == lima && textb == limb) \
1135 return 0; \
1137 while (0)
1139 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1140 else if (ignore)
1141 CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1142 else if (translate)
1143 while (texta < lima && textb < limb)
1145 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1147 diff = (translate[UCHAR (*--texta)]
1148 - translate[UCHAR (*--textb)]);
1149 break;
1152 else
1153 diff = memcmp (texta, textb, min (lena, lenb));
1155 if (diff)
1156 return key->reverse ? -diff : diff;
1157 if ((diff = lena - lenb) != 0)
1158 return key->reverse ? -diff : diff;
1161 return 0;
1164 /* Compare two lines A and B, returning negative, zero, or positive
1165 depending on whether A compares less than, equal to, or greater than B. */
1167 static int
1168 compare (register const struct line *a, register const struct line *b)
1170 int diff, tmpa, tmpb, mini;
1172 /* First try to compare on the specified keys (if any).
1173 The only two cases with no key at all are unadorned sort,
1174 and unadorned sort -r. */
1175 if (keyhead.next)
1177 diff = keycompare (a, b);
1178 if (diff != 0)
1179 return diff;
1180 if (unique || stable)
1181 return 0;
1184 /* If the keys all compare equal (or no keys were specified)
1185 fall through to the default byte-by-byte comparison. */
1186 tmpa = a->length, tmpb = b->length;
1187 mini = min (tmpa, tmpb);
1188 if (mini == 0)
1189 diff = tmpa - tmpb;
1190 else
1192 char *ap = a->text, *bp = b->text;
1194 diff = UCHAR (*ap) - UCHAR (*bp);
1195 if (diff == 0)
1197 diff = memcmp (ap, bp, mini);
1198 if (diff == 0)
1199 diff = tmpa - tmpb;
1203 return reverse ? -diff : diff;
1206 /* Check that the lines read from the given FP come in order. Return
1207 1 if they do and 0 if there is a disorder.
1208 FIXME: return number of first out-of-order line if not sorted. */
1210 static int
1211 checkfp (FILE *fp)
1213 struct buffer buf; /* Input buffer. */
1214 struct lines lines; /* Lines scanned from the buffer. */
1215 struct line temp; /* Copy of previous line. */
1216 int cc; /* Character count. */
1217 int alloc, sorted = 1;
1219 initbuf (&buf, mergealloc);
1220 initlines (&lines, mergealloc / linelength + 1,
1221 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1222 alloc = linelength;
1223 temp.text = xmalloc (alloc);
1225 cc = fillbuf (&buf, fp);
1226 if (cc == 0)
1227 goto finish;
1229 findlines (&buf, &lines);
1231 while (1)
1233 struct line *prev_line; /* Pointer to previous line. */
1234 int cmp; /* Result of calling compare. */
1235 int i;
1237 /* Compare each line in the buffer with its successor. */
1238 for (i = 0; i < lines.used - 1; ++i)
1240 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1241 if ((unique && cmp >= 0) || (cmp > 0))
1243 sorted = 0;
1244 goto finish;
1248 /* Save the last line of the buffer and refill the buffer. */
1249 prev_line = lines.lines + (lines.used - 1);
1250 if (prev_line->length + 1 > alloc)
1254 alloc *= 2;
1256 while (alloc < prev_line->length + 1);
1257 temp.text = xrealloc (temp.text, alloc);
1259 assert (prev_line->length + 1 <= alloc);
1260 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1261 temp.length = prev_line->length;
1262 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1263 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1265 cc = fillbuf (&buf, fp);
1266 if (cc == 0)
1267 break;
1269 findlines (&buf, &lines);
1270 /* Make sure the line saved from the old buffer contents is
1271 less than or equal to the first line of the new buffer. */
1272 cmp = compare (&temp, &lines.lines[0]);
1273 if ((unique && cmp >= 0) || (cmp > 0))
1275 sorted = 0;
1276 break;
1280 finish:
1281 xfclose (fp);
1282 free (buf.buf);
1283 free ((char *) lines.lines);
1284 free (temp.text);
1285 return sorted;
1288 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1289 Close FPS before returning. */
1291 static void
1292 mergefps (FILE **fps, register int nfps, FILE *ofp)
1294 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1295 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1296 struct line saved; /* Saved line for unique check. */
1297 int savedflag = 0; /* True if there is a saved line. */
1298 int savealloc; /* Size allocated for the saved line. */
1299 int cur[NMERGE]; /* Current line in each line table. */
1300 int ord[NMERGE]; /* Table representing a permutation of fps,
1301 such that lines[ord[0]].lines[cur[ord[0]]]
1302 is the smallest line and will be next
1303 output. */
1304 register int i, j, t;
1306 #ifdef lint /* Suppress `used before initialized' warning. */
1307 savealloc = 0;
1308 #endif
1310 /* Allocate space for a saved line if necessary. */
1311 if (unique)
1313 savealloc = linelength;
1314 saved.text = xmalloc (savealloc);
1317 /* Read initial lines from each input file. */
1318 for (i = 0; i < nfps; ++i)
1320 initbuf (&buffer[i], mergealloc);
1321 /* If a file is empty, eliminate it from future consideration. */
1322 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1324 xfclose (fps[i]);
1325 --nfps;
1326 for (j = i; j < nfps; ++j)
1327 fps[j] = fps[j + 1];
1329 if (i == nfps)
1330 free (buffer[i].buf);
1331 else
1333 initlines (&lines[i], mergealloc / linelength + 1,
1334 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1335 findlines (&buffer[i], &lines[i]);
1336 cur[i] = 0;
1340 /* Set up the ord table according to comparisons among input lines.
1341 Since this only reorders two items if one is strictly greater than
1342 the other, it is stable. */
1343 for (i = 0; i < nfps; ++i)
1344 ord[i] = i;
1345 for (i = 1; i < nfps; ++i)
1346 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1347 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1348 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1350 /* Repeatedly output the smallest line until no input remains. */
1351 while (nfps)
1353 /* If uniqified output is turned on, output only the first of
1354 an identical series of lines. */
1355 if (unique)
1357 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1359 write_bytes (saved.text, saved.length, ofp);
1360 putc (eolchar, ofp);
1361 savedflag = 0;
1363 if (!savedflag)
1365 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1367 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1368 savealloc *= 2;
1369 saved.text = xrealloc (saved.text, savealloc);
1371 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1372 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1373 saved.length + 1);
1374 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1376 saved.keybeg = saved.text +
1377 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1378 - lines[ord[0]].lines[cur[ord[0]]].text);
1380 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1382 saved.keylim = saved.text +
1383 (lines[ord[0]].lines[cur[ord[0]]].keylim
1384 - lines[ord[0]].lines[cur[ord[0]]].text);
1386 savedflag = 1;
1389 else
1391 write_bytes (lines[ord[0]].lines[cur[ord[0]]].text,
1392 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1393 putc (eolchar, ofp);
1396 /* Check if we need to read more lines into core. */
1397 if (++cur[ord[0]] == lines[ord[0]].used)
1398 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1400 findlines (&buffer[ord[0]], &lines[ord[0]]);
1401 cur[ord[0]] = 0;
1403 else
1405 /* We reached EOF on fps[ord[0]]. */
1406 for (i = 1; i < nfps; ++i)
1407 if (ord[i] > ord[0])
1408 --ord[i];
1409 --nfps;
1410 xfclose (fps[ord[0]]);
1411 free (buffer[ord[0]].buf);
1412 free ((char *) lines[ord[0]].lines);
1413 for (i = ord[0]; i < nfps; ++i)
1415 fps[i] = fps[i + 1];
1416 buffer[i] = buffer[i + 1];
1417 lines[i] = lines[i + 1];
1418 cur[i] = cur[i + 1];
1420 for (i = 0; i < nfps; ++i)
1421 ord[i] = ord[i + 1];
1422 continue;
1425 /* The new line just read in may be larger than other lines
1426 already in core; push it back in the queue until we encounter
1427 a line larger than it. */
1428 for (i = 1; i < nfps; ++i)
1430 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1431 &lines[ord[i]].lines[cur[ord[i]]]);
1432 if (!t)
1433 t = ord[0] - ord[i];
1434 if (t < 0)
1435 break;
1437 t = ord[0];
1438 for (j = 1; j < i; ++j)
1439 ord[j - 1] = ord[j];
1440 ord[i - 1] = t;
1443 if (unique && savedflag)
1445 write_bytes (saved.text, saved.length, ofp);
1446 putc (eolchar, ofp);
1447 free (saved.text);
1451 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1453 static void
1454 sortlines (struct line *lines, int nlines, struct line *temp)
1456 register struct line *lo, *hi, *t;
1457 register int nlo, nhi;
1459 if (nlines == 2)
1461 if (compare (&lines[0], &lines[1]) > 0)
1462 *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1463 return;
1466 nlo = nlines / 2;
1467 lo = lines;
1468 nhi = nlines - nlo;
1469 hi = lines + nlo;
1471 if (nlo > 1)
1472 sortlines (lo, nlo, temp);
1474 if (nhi > 1)
1475 sortlines (hi, nhi, temp);
1477 t = temp;
1479 while (nlo && nhi)
1480 if (compare (lo, hi) <= 0)
1481 *t++ = *lo++, --nlo;
1482 else
1483 *t++ = *hi++, --nhi;
1484 while (nlo--)
1485 *t++ = *lo++;
1487 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1488 *lo++ = *t++;
1491 /* Check that each of the NFILES FILES is ordered.
1492 Return a count of disordered files. */
1494 static int
1495 check (char **files, int nfiles)
1497 int i, disorders = 0;
1498 FILE *fp;
1500 for (i = 0; i < nfiles; ++i)
1502 fp = xfopen (files[i], "r");
1503 if (!checkfp (fp))
1505 fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]);
1506 ++disorders;
1509 return disorders;
1512 /* Merge NFILES FILES onto OFP. */
1514 static void
1515 merge (char **files, int nfiles, FILE *ofp)
1517 int i, j, t;
1518 char *temp;
1519 FILE *fps[NMERGE], *tfp;
1521 while (nfiles > NMERGE)
1523 t = 0;
1524 for (i = 0; i < nfiles / NMERGE; ++i)
1526 for (j = 0; j < NMERGE; ++j)
1527 fps[j] = xfopen (files[i * NMERGE + j], "r");
1528 tfp = xtmpfopen (temp = tempname ());
1529 mergefps (fps, NMERGE, tfp);
1530 xfclose (tfp);
1531 for (j = 0; j < NMERGE; ++j)
1532 zaptemp (files[i * NMERGE + j]);
1533 files[t++] = temp;
1535 for (j = 0; j < nfiles % NMERGE; ++j)
1536 fps[j] = xfopen (files[i * NMERGE + j], "r");
1537 tfp = xtmpfopen (temp = tempname ());
1538 mergefps (fps, nfiles % NMERGE, tfp);
1539 xfclose (tfp);
1540 for (j = 0; j < nfiles % NMERGE; ++j)
1541 zaptemp (files[i * NMERGE + j]);
1542 files[t++] = temp;
1543 nfiles = t;
1546 for (i = 0; i < nfiles; ++i)
1547 fps[i] = xfopen (files[i], "r");
1548 mergefps (fps, i, ofp);
1549 for (i = 0; i < nfiles; ++i)
1550 zaptemp (files[i]);
1553 /* Sort NFILES FILES onto OFP. */
1555 static void
1556 sort (char **files, int nfiles, FILE *ofp)
1558 struct buffer buf;
1559 struct lines lines;
1560 struct line *tmp;
1561 int i, ntmp;
1562 FILE *fp, *tfp;
1563 struct tempnode *node;
1564 int n_temp_files = 0;
1565 char **tempfiles;
1567 initbuf (&buf, sortalloc);
1568 initlines (&lines, sortalloc / linelength + 1,
1569 LINEALLOC / sizeof (struct line));
1570 ntmp = lines.alloc;
1571 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1573 while (nfiles--)
1575 fp = xfopen (*files++, "r");
1576 while (fillbuf (&buf, fp))
1578 findlines (&buf, &lines);
1579 if (lines.used > ntmp)
1581 while (lines.used > ntmp)
1582 ntmp *= 2;
1583 tmp = (struct line *)
1584 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1586 sortlines (lines.lines, lines.used, tmp);
1587 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1588 tfp = ofp;
1589 else
1591 ++n_temp_files;
1592 tfp = xtmpfopen (tempname ());
1594 for (i = 0; i < lines.used; ++i)
1595 if (!unique || i == 0
1596 || compare (&lines.lines[i], &lines.lines[i - 1]))
1598 write_bytes (lines.lines[i].text, lines.lines[i].length, tfp);
1599 putc (eolchar, tfp);
1601 if (tfp != ofp)
1602 xfclose (tfp);
1604 xfclose (fp);
1607 free (buf.buf);
1608 free ((char *) lines.lines);
1609 free ((char *) tmp);
1611 if (n_temp_files)
1613 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1614 i = n_temp_files;
1615 for (node = temphead.next; i > 0; node = node->next)
1616 tempfiles[--i] = node->name;
1617 merge (tempfiles, n_temp_files, ofp);
1618 free ((char *) tempfiles);
1622 /* Insert key KEY at the end of the list (`keyhead'). */
1624 static void
1625 insertkey (struct keyfield *key)
1627 struct keyfield *k = &keyhead;
1629 while (k->next)
1630 k = k->next;
1631 k->next = key;
1632 key->next = NULL;
1635 static void
1636 badfieldspec (const char *s)
1638 error (SORT_FAILURE, 0, _("invalid field specification `%s'"), s);
1641 /* Handle interrupts and hangups. */
1643 static void
1644 sighandler (int sig)
1646 #ifdef SA_INTERRUPT
1647 struct sigaction sigact;
1649 sigact.sa_handler = SIG_DFL;
1650 sigemptyset (&sigact.sa_mask);
1651 sigact.sa_flags = 0;
1652 sigaction (sig, &sigact, NULL);
1653 #else /* !SA_INTERRUPT */
1654 signal (sig, SIG_DFL);
1655 #endif /* SA_INTERRUPT */
1656 cleanup ();
1657 kill (getpid (), sig);
1660 /* Set the ordering options for KEY specified in S.
1661 Return the address of the first character in S that
1662 is not a valid ordering option.
1663 BLANKTYPE is the kind of blanks that 'b' should skip. */
1665 static char *
1666 set_ordering (register const char *s, struct keyfield *key,
1667 enum blanktype blanktype)
1669 while (*s)
1671 switch (*s)
1673 case 'b':
1674 if (blanktype == bl_start || blanktype == bl_both)
1675 key->skipsblanks = 1;
1676 if (blanktype == bl_end || blanktype == bl_both)
1677 key->skipeblanks = 1;
1678 break;
1679 case 'd':
1680 key->ignore = nondictionary;
1681 break;
1682 case 'f':
1683 key->translate = fold_toupper;
1684 break;
1685 case 'g':
1686 key->general_numeric = 1;
1687 break;
1688 case 'i':
1689 key->ignore = nonprinting;
1690 break;
1691 case 'M':
1692 key->month = 1;
1693 break;
1694 case 'n':
1695 key->numeric = 1;
1696 break;
1697 case 'r':
1698 key->reverse = 1;
1699 break;
1700 default:
1701 return (char *) s;
1703 ++s;
1705 return (char *) s;
1708 static void
1709 key_init (struct keyfield *key)
1711 memset (key, 0, sizeof (*key));
1712 key->eword = -1;
1716 main (int argc, char **argv)
1718 struct keyfield *key = NULL, gkey;
1719 char *s;
1720 int i, t, t2;
1721 int checkonly = 0, mergeonly = 0, nfiles = 0;
1722 char *minus = "-", *outfile = minus, **files, *tmp;
1723 FILE *ofp;
1724 #ifdef SA_INTERRUPT
1725 struct sigaction oldact, newact;
1726 #endif /* SA_INTERRUPT */
1728 program_name = argv[0];
1729 setlocale (LC_ALL, "");
1730 bindtextdomain (PACKAGE, LOCALEDIR);
1731 textdomain (PACKAGE);
1733 parse_long_options (argc, argv, "sort", GNU_PACKAGE, VERSION, usage);
1735 have_read_stdin = 0;
1736 inittables ();
1738 temp_file_prefix = getenv ("TMPDIR");
1739 if (temp_file_prefix == NULL)
1740 temp_file_prefix = DEFAULT_TMPDIR;
1742 #ifdef SA_INTERRUPT
1743 newact.sa_handler = sighandler;
1744 sigemptyset (&newact.sa_mask);
1745 newact.sa_flags = 0;
1747 sigaction (SIGINT, NULL, &oldact);
1748 if (oldact.sa_handler != SIG_IGN)
1749 sigaction (SIGINT, &newact, NULL);
1750 sigaction (SIGHUP, NULL, &oldact);
1751 if (oldact.sa_handler != SIG_IGN)
1752 sigaction (SIGHUP, &newact, NULL);
1753 sigaction (SIGPIPE, NULL, &oldact);
1754 if (oldact.sa_handler != SIG_IGN)
1755 sigaction (SIGPIPE, &newact, NULL);
1756 sigaction (SIGTERM, NULL, &oldact);
1757 if (oldact.sa_handler != SIG_IGN)
1758 sigaction (SIGTERM, &newact, NULL);
1759 #else /* !SA_INTERRUPT */
1760 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1761 signal (SIGINT, sighandler);
1762 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1763 signal (SIGHUP, sighandler);
1764 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1765 signal (SIGPIPE, sighandler);
1766 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1767 signal (SIGTERM, sighandler);
1768 #endif /* !SA_INTERRUPT */
1770 gkey.sword = gkey.eword = -1;
1771 gkey.ignore = NULL;
1772 gkey.translate = NULL;
1773 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1774 gkey.skipsblanks = gkey.skipeblanks = 0;
1776 files = (char **) xmalloc (sizeof (char *) * argc);
1778 for (i = 1; i < argc; ++i)
1780 if (argv[i][0] == '+')
1782 if (key)
1783 insertkey (key);
1784 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1785 key_init (key);
1786 s = argv[i] + 1;
1787 if (! (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1]))))
1788 badfieldspec (argv[i]);
1789 for (t = 0; ISDIGIT (*s); ++s)
1790 t = 10 * t + *s - '0';
1791 t2 = 0;
1792 if (*s == '.')
1793 for (++s; ISDIGIT (*s); ++s)
1794 t2 = 10 * t2 + *s - '0';
1795 if (t2 || t)
1797 key->sword = t;
1798 key->schar = t2;
1800 else
1801 key->sword = -1;
1802 s = set_ordering (s, key, bl_start);
1803 if (*s)
1804 badfieldspec (argv[i]);
1806 else if (argv[i][0] == '-' && argv[i][1])
1808 s = argv[i] + 1;
1809 if (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1])))
1811 if (!key)
1813 /* Provoke with `sort -9'. */
1814 error (0, 0, _("when using the old-style +POS and -POS \
1815 key specifiers,\nthe +POS specifier must come first"));
1816 usage (SORT_FAILURE);
1818 for (t = 0; ISDIGIT (*s); ++s)
1819 t = t * 10 + *s - '0';
1820 t2 = 0;
1821 if (*s == '.')
1822 for (++s; ISDIGIT (*s); ++s)
1823 t2 = t2 * 10 + *s - '0';
1824 key->eword = t;
1825 key->echar = t2;
1826 s = set_ordering (s, key, bl_end);
1827 if (*s)
1828 badfieldspec (argv[i]);
1829 insertkey (key);
1830 key = NULL;
1832 else
1833 while (*s)
1835 s = set_ordering (s, &gkey, bl_both);
1836 switch (*s)
1838 case '\0':
1839 break;
1840 case 'c':
1841 checkonly = 1;
1842 break;
1843 case 'k':
1844 if (s[1])
1845 ++s;
1846 else
1848 if (i == argc - 1)
1849 error (SORT_FAILURE, 0,
1850 _("option `-k' requires an argument"));
1851 else
1852 s = argv[++i];
1854 if (key)
1855 insertkey (key);
1856 key = (struct keyfield *)
1857 xmalloc (sizeof (struct keyfield));
1858 key_init (key);
1859 /* Get POS1. */
1860 if (!ISDIGIT (*s))
1861 badfieldspec (argv[i]);
1862 for (t = 0; ISDIGIT (*s); ++s)
1863 t = 10 * t + *s - '0';
1864 if (t == 0)
1866 /* Provoke with `sort -k0' */
1867 error (0, 0, _("the starting field number argument \
1868 to the `-k' option must be positive"));
1869 badfieldspec (argv[i]);
1871 --t;
1872 t2 = 0;
1873 if (*s == '.')
1875 if (!ISDIGIT (s[1]))
1877 /* Provoke with `sort -k1.' */
1878 error (0, 0, _("starting field spec has `.' but \
1879 lacks following character offset"));
1880 badfieldspec (argv[i]);
1882 for (++s; ISDIGIT (*s); ++s)
1883 t2 = 10 * t2 + *s - '0';
1884 if (t2 == 0)
1886 /* Provoke with `sort -k1.0' */
1887 error (0, 0, _("starting field character offset \
1888 argument to the `-k' option\nmust be positive"));
1889 badfieldspec (argv[i]);
1891 --t2;
1893 if (t2 || t)
1895 key->sword = t;
1896 key->schar = t2;
1898 else
1899 key->sword = -1;
1900 s = set_ordering (s, key, bl_start);
1901 if (*s == 0)
1903 key->eword = -1;
1904 key->echar = 0;
1906 else if (*s != ',')
1907 badfieldspec (argv[i]);
1908 else if (*s == ',')
1910 /* Skip over comma. */
1911 ++s;
1912 if (*s == 0)
1914 /* Provoke with `sort -k1,' */
1915 error (0, 0, _("field specification has `,' but \
1916 lacks following field spec"));
1917 badfieldspec (argv[i]);
1919 /* Get POS2. */
1920 for (t = 0; ISDIGIT (*s); ++s)
1921 t = t * 10 + *s - '0';
1922 if (t == 0)
1924 /* Provoke with `sort -k1,0' */
1925 error (0, 0, _("ending field number argument \
1926 to the `-k' option must be positive"));
1927 badfieldspec (argv[i]);
1929 --t;
1930 t2 = 0;
1931 if (*s == '.')
1933 if (!ISDIGIT (s[1]))
1935 /* Provoke with `sort -k1,1.' */
1936 error (0, 0, _("ending field spec has `.' \
1937 but lacks following character offset"));
1938 badfieldspec (argv[i]);
1940 for (++s; ISDIGIT (*s); ++s)
1941 t2 = t2 * 10 + *s - '0';
1943 else
1945 /* `-k 2,3' is equivalent to `+1 -3'. */
1946 ++t;
1948 key->eword = t;
1949 key->echar = t2;
1950 s = set_ordering (s, key, bl_end);
1951 if (*s)
1952 badfieldspec (argv[i]);
1954 insertkey (key);
1955 key = NULL;
1956 goto outer;
1957 case 'm':
1958 mergeonly = 1;
1959 break;
1960 case 'o':
1961 if (s[1])
1962 outfile = s + 1;
1963 else
1965 if (i == argc - 1)
1966 error (SORT_FAILURE, 0,
1967 _("option `-o' requires an argument"));
1968 else
1969 outfile = argv[++i];
1971 goto outer;
1972 case 's':
1973 stable = 1;
1974 break;
1975 case 't':
1976 if (s[1])
1977 tab = *++s;
1978 else if (i < argc - 1)
1980 tab = *argv[++i];
1981 goto outer;
1983 else
1984 error (SORT_FAILURE, 0,
1985 _("option `-t' requires an argument"));
1986 break;
1987 case 'T':
1988 if (s[1])
1989 temp_file_prefix = ++s;
1990 else
1992 if (i < argc - 1)
1993 temp_file_prefix = argv[++i];
1994 else
1995 error (SORT_FAILURE, 0,
1996 _("option `-T' requires an argument"));
1998 goto outer;
1999 /* break; */
2000 case 'u':
2001 unique = 1;
2002 break;
2003 case 'z':
2004 eolchar = 0;
2005 break;
2006 case 'y':
2007 /* Accept and ignore e.g. -y0 for compatibility with
2008 Solaris 2. */
2009 goto outer;
2010 default:
2011 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2012 argv[0], *s);
2013 usage (SORT_FAILURE);
2015 if (*s)
2016 ++s;
2019 else /* Not an option. */
2021 files[nfiles++] = argv[i];
2023 outer:;
2026 if (key)
2027 insertkey (key);
2029 /* Inheritance of global options to individual keys. */
2030 for (key = keyhead.next; key; key = key->next)
2031 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2032 && !key->skipeblanks && !key->month && !key->numeric
2033 && !key->general_numeric)
2035 key->ignore = gkey.ignore;
2036 key->translate = gkey.translate;
2037 key->skipsblanks = gkey.skipsblanks;
2038 key->skipeblanks = gkey.skipeblanks;
2039 key->month = gkey.month;
2040 key->numeric = gkey.numeric;
2041 key->general_numeric = gkey.general_numeric;
2042 key->reverse = gkey.reverse;
2045 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2046 || gkey.skipeblanks || gkey.month || gkey.numeric
2047 || gkey.general_numeric))
2048 insertkey (&gkey);
2049 reverse = gkey.reverse;
2051 if (nfiles == 0)
2053 nfiles = 1;
2054 files = &minus;
2057 if (checkonly)
2059 /* POSIX requires that sort return 1 IFF invoked with -c and the
2060 input is not properly sorted. */
2061 exit (check (files, nfiles) == 0 ? 0 : 1);
2064 if (strcmp (outfile, "-"))
2066 struct stat outstat;
2067 if (stat (outfile, &outstat) == 0)
2069 /* The following code prevents a race condition when
2070 people use the brain dead shell programming idiom:
2071 cat file | sort -o file
2072 This feature is provided for historical compatibility,
2073 but we strongly discourage ever relying on this in
2074 new shell programs. */
2076 /* Temporarily copy each input file that might be another name
2077 for the output file. When in doubt (e.g. a pipe), copy. */
2078 for (i = 0; i < nfiles; ++i)
2080 char buf[8192];
2081 FILE *fp;
2082 int cc;
2084 if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
2086 struct stat instat;
2087 if ((strcmp (files[i], "-")
2088 ? stat (files[i], &instat)
2089 : fstat (STDIN_FILENO, &instat)) != 0)
2091 error (0, errno, "%s", files[i]);
2092 cleanup ();
2093 exit (SORT_FAILURE);
2095 if (S_ISREG (instat.st_mode)
2096 && (instat.st_ino != outstat.st_ino
2097 || instat.st_dev != outstat.st_dev))
2099 /* We know the files are distinct. */
2100 continue;
2104 fp = xfopen (files[i], "r");
2105 tmp = tempname ();
2106 ofp = xtmpfopen (tmp);
2107 while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
2108 write_bytes (buf, cc, ofp);
2109 if (ferror (fp))
2111 error (0, errno, "%s", files[i]);
2112 cleanup ();
2113 exit (SORT_FAILURE);
2115 xfclose (ofp);
2116 xfclose (fp);
2117 files[i] = tmp;
2120 ofp = xfopen (outfile, "w");
2122 else
2123 ofp = stdout;
2125 if (mergeonly)
2126 merge (files, nfiles, ofp);
2127 else
2128 sort (files, nfiles, ofp);
2129 cleanup ();
2131 /* If we wait for the implicit flush on exit, and the parent process
2132 has closed stdout (e.g., exec >&- in a shell), then the output file
2133 winds up empty. I don't understand why. This is under SunOS,
2134 Solaris, Ultrix, and Irix. This premature fflush makes the output
2135 reappear. --karl@cs.umb.edu */
2136 if (fflush (ofp) < 0)
2137 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2139 if (have_read_stdin && fclose (stdin) == EOF)
2140 error (SORT_FAILURE, errno, outfile);
2141 if (ferror (stdout) || fclose (stdout) == EOF)
2142 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2144 exit (EXIT_SUCCESS);