.
[coreutils.git] / src / sort.c
blob733df0adfaf5a774d2d3300e89d8686126d4adc2
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 91, 92, 93, 94, 95, 1996 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
22 #include <config.h>
24 /* Get isblank from GNU libc. */
25 #define _GNU_SOURCE
27 #include <sys/types.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #define NDEBUG 1
31 #include <assert.h>
32 #include "system.h"
33 #include "long-options.h"
34 #include "error.h"
35 #include "xstrtod.h"
37 #ifdef HAVE_LIMITS_H
38 #include <limits.h>
39 #else
40 #ifndef UCHAR_MAX
41 #define UCHAR_MAX 255
42 #endif
43 #endif
44 #ifndef STDC_HEADERS
45 char *malloc ();
46 char *realloc ();
47 void free ();
48 #endif
50 /* Undefine, to avoid warning about redefinition on some systems. */
51 #undef min
52 #define min(a, b) ((a) < (b) ? (a) : (b))
54 #define UCHAR_LIM (UCHAR_MAX + 1)
55 #define UCHAR(c) ((unsigned char) (c))
57 #ifndef DEFAULT_TMPDIR
58 #define DEFAULT_TMPDIR "/tmp"
59 #endif
61 /* Use this as exit status in case of error, not EXIT_FAILURE. This
62 is necessary because EXIT_FAILURE is usually 1 and POSIX requires
63 that sort exit with status 1 IFF invoked with -c and the input is
64 not properly sorted. Any other irregular exit must exit with a
65 status code greater than 1. */
66 #define SORT_FAILURE 2
68 /* The kind of blanks for '-b' to skip in various options. */
69 enum blanktype { bl_start, bl_end, bl_both };
71 /* The character marking end of line. Default to \n. */
72 int eolchar = '\n';
74 /* Lines are held in core as counted strings. */
75 struct line
77 char *text; /* Text of the line. */
78 int length; /* Length not including final newline. */
79 char *keybeg; /* Start of first key. */
80 char *keylim; /* Limit of first key. */
83 /* Arrays of lines. */
84 struct lines
86 struct line *lines; /* Dynamically allocated array of lines. */
87 int used; /* Number of slots used. */
88 int alloc; /* Number of slots allocated. */
89 int limit; /* Max number of slots to allocate. */
92 /* Input buffers. */
93 struct buffer
95 char *buf; /* Dynamically allocated buffer. */
96 int used; /* Number of bytes used. */
97 int alloc; /* Number of bytes allocated. */
98 int left; /* Number of bytes left after line parsing. */
101 struct keyfield
103 int sword; /* Zero-origin 'word' to start at. */
104 int schar; /* Additional characters to skip. */
105 int skipsblanks; /* Skip leading white space at start. */
106 int eword; /* Zero-origin first word after field. */
107 int echar; /* Additional characters in field. */
108 int skipeblanks; /* Skip trailing white space at finish. */
109 int *ignore; /* Boolean array of characters to ignore. */
110 char *translate; /* Translation applied to characters. */
111 int numeric; /* Flag for numeric comparison. Handle
112 strings of digits with optional decimal
113 point, but no exponential notation. */
114 int general_numeric; /* Flag for general, numeric comparison.
115 Handle numbers in exponential notation. */
116 int month; /* Flag for comparison by month name. */
117 int reverse; /* Reverse the sense of comparison. */
118 struct keyfield *next; /* Next keyfield to try. */
121 struct month
123 char *name;
124 int val;
127 /* The name this program was run with. */
128 char *program_name;
130 /* Table of digits. */
131 static int digits[UCHAR_LIM];
133 /* Table of white space. */
134 static int blanks[UCHAR_LIM];
136 /* Table of non-printing characters. */
137 static int nonprinting[UCHAR_LIM];
139 /* Table of non-dictionary characters (not letters, digits, or blanks). */
140 static int nondictionary[UCHAR_LIM];
142 /* Translation table folding lower case to upper. */
143 static char fold_toupper[UCHAR_LIM];
145 /* Table mapping 3-letter month names to integers.
146 Alphabetic order allows binary search. */
147 static struct month const monthtab[] =
149 {"APR", 4},
150 {"AUG", 8},
151 {"DEC", 12},
152 {"FEB", 2},
153 {"JAN", 1},
154 {"JUL", 7},
155 {"JUN", 6},
156 {"MAR", 3},
157 {"MAY", 5},
158 {"NOV", 11},
159 {"OCT", 10},
160 {"SEP", 9}
163 /* During the merge phase, the number of files to merge at once. */
164 #define NMERGE 16
166 /* Initial buffer size for in core sorting. Will not grow unless a
167 line longer than this is seen. */
168 static int sortalloc = 512 * 1024;
170 /* Initial buffer size for in core merge buffers. Bear in mind that
171 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
172 static int mergealloc = 16 * 1024;
174 /* Guess of average line length. */
175 static int linelength = 30;
177 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
178 #define LINEALLOC (256 * 1024)
180 /* Prefix for temporary file names. */
181 static char *temp_file_prefix;
183 /* Flag to reverse the order of all comparisons. */
184 static int reverse;
186 /* Flag for stable sort. This turns off the last ditch bytewise
187 comparison of lines, and instead leaves lines in the same order
188 they were read if all keys compare equal. */
189 static int stable;
191 /* Tab character separating fields. If NUL, then fields are separated
192 by the empty string between a non-whitespace character and a whitespace
193 character. */
194 static char tab;
196 /* Flag to remove consecutive duplicate lines from the output.
197 Only the last of a sequence of equal lines will be output. */
198 static int unique;
200 /* Nonzero if any of the input files are the standard input. */
201 static int have_read_stdin;
203 /* Lists of key field comparisons to be tried. */
204 static struct keyfield keyhead;
206 static void
207 usage (int status)
209 if (status != 0)
210 fprintf (stderr, _("Try `%s --help' for more information.\n"),
211 program_name);
212 else
214 printf (_("\
215 Usage: %s [OPTION]... [FILE]...\n\
217 program_name);
218 printf (_("\
219 Write sorted concatenation of all FILE(s) to standard output.\n\
221 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
222 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
223 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
224 -b ignore leading blanks in sort fields or keys\n\
225 -c check if given files already sorted, do not sort\n\
226 -d consider only [a-zA-Z0-9 ] characters in keys\n\
227 -f fold lower case to upper case characters in keys\n\
228 -g compare according to general numerical value, imply -b\n\
229 -i consider only [\\040-\\0176] characters in keys\n\
230 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
231 -m merge already sorted files, do not sort\n\
232 -n compare according to string numerical value, imply -b\n\
233 -o FILE write result on FILE instead of standard output\n\
234 -r reverse the result of comparisons\n\
235 -s stabilize sort by disabling last resort comparison\n\
236 -t SEP use SEParator instead of non- to whitespace transition\n\
237 -u with -c, check for strict ordering\n\
238 -u with -m, only output the first of an equal sequence\n\
239 -z end lines with 0 byte, not newline, for find -print0\n\
240 --help display this help and exit\n\
241 --version output version information and exit\n\
243 POS is F[.C][OPTS], where F is the field number and C the character\n\
244 position in the field, both counted from zero. OPTS is made up of one\n\
245 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
246 for that key. If no key given, use the entire line as key. With no\n\
247 FILE, or when FILE is -, read standard input.\n\
249 , DEFAULT_TMPDIR);
251 /* Don't use EXIT_FAILURE here in case it is defined to be 1.
252 POSIX requires that sort return 1 IFF invoked with -c and
253 the input is not properly sorted. */
254 assert (status == 0 || status == SORT_FAILURE);
255 exit (status);
258 /* The list of temporary files. */
259 static struct tempnode
261 char *name;
262 struct tempnode *next;
263 } temphead;
265 /* Clean up any remaining temporary files. */
267 static void
268 cleanup (void)
270 struct tempnode *node;
272 for (node = temphead.next; node; node = node->next)
273 unlink (node->name);
276 /* Allocate N bytes of memory dynamically, with error checking. */
278 static char *
279 xmalloc (unsigned int n)
281 char *p;
283 p = malloc (n);
284 if (p == 0)
286 error (0, 0, _("virtual memory exhausted"));
287 cleanup ();
288 exit (SORT_FAILURE);
290 return p;
293 /* Change the size of an allocated block of memory P to N bytes,
294 with error checking.
295 If P is NULL, run xmalloc.
296 If N is 0, run free and return NULL. */
298 static char *
299 xrealloc (char *p, unsigned int n)
301 if (p == 0)
302 return xmalloc (n);
303 if (n == 0)
305 free (p);
306 return 0;
308 p = realloc (p, n);
309 if (p == 0)
311 error (0, 0, _("virtual memory exhausted"));
312 cleanup ();
313 exit (SORT_FAILURE);
315 return p;
318 static FILE *
319 xtmpfopen (const char *file)
321 FILE *fp;
322 int fd;
324 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
325 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
327 error (0, errno, "%s", file);
328 cleanup ();
329 exit (SORT_FAILURE);
332 return fp;
335 static FILE *
336 xfopen (const char *file, const char *how)
338 FILE *fp;
340 if (strcmp (file, "-") == 0)
342 fp = stdin;
344 else
346 if ((fp = fopen (file, how)) == NULL)
348 error (0, errno, "%s", file);
349 cleanup ();
350 exit (SORT_FAILURE);
354 if (fp == stdin)
355 have_read_stdin = 1;
356 return fp;
359 static void
360 xfclose (FILE *fp)
362 if (fp == stdin)
364 /* Allow reading stdin from tty more than once. */
365 if (feof (fp))
366 clearerr (fp);
368 else if (fp == stdout)
370 if (fflush (fp) != 0)
372 error (0, errno, _("flushing file"));
373 cleanup ();
374 exit (SORT_FAILURE);
377 else
379 if (fclose (fp) != 0)
381 error (0, errno, _("error closing file"));
382 cleanup ();
383 exit (SORT_FAILURE);
388 static void
389 write_bytes (const char *buf, size_t n_bytes, FILE *fp)
391 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
393 error (0, errno, _("write error"));
394 cleanup ();
395 exit (SORT_FAILURE);
399 /* Return a name for a temporary file. */
401 static char *
402 tempname (void)
404 static unsigned int seq;
405 int len = strlen (temp_file_prefix);
406 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
407 struct tempnode *node;
409 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
410 sprintf (name,
411 "%s%ssort%5.5d%5.5d",
412 temp_file_prefix,
413 (len && temp_file_prefix[len - 1] != '/') ? "/" : "",
414 (unsigned int) getpid () & 0xffff, seq);
416 /* Make sure that SEQ's value fits in 5 digits. */
417 ++seq;
418 if (seq >= 100000)
419 seq = 0;
421 node->name = name;
422 node->next = temphead.next;
423 temphead.next = node;
424 return name;
427 /* Search through the list of temporary files for NAME;
428 remove it if it is found on the list. */
430 static void
431 zaptemp (char *name)
433 struct tempnode *node, *temp;
435 for (node = &temphead; node->next; node = node->next)
436 if (!strcmp (name, node->next->name))
437 break;
438 if (node->next)
440 temp = node->next;
441 unlink (temp->name);
442 free (temp->name);
443 node->next = temp->next;
444 free ((char *) temp);
448 /* Initialize the character class tables. */
450 static void
451 inittables (void)
453 int i;
455 for (i = 0; i < UCHAR_LIM; ++i)
457 if (ISBLANK (i))
458 blanks[i] = 1;
459 if (ISDIGIT (i))
460 digits[i] = 1;
461 if (!ISPRINT (i))
462 nonprinting[i] = 1;
463 if (!ISALNUM (i) && !ISBLANK (i))
464 nondictionary[i] = 1;
465 if (ISLOWER (i))
466 fold_toupper[i] = toupper (i);
467 else
468 fold_toupper[i] = i;
472 /* Initialize BUF, allocating ALLOC bytes initially. */
474 static void
475 initbuf (struct buffer *buf, int alloc)
477 buf->alloc = alloc;
478 buf->buf = xmalloc (buf->alloc);
479 buf->used = buf->left = 0;
482 /* Fill BUF reading from FP, moving buf->left bytes from the end
483 of buf->buf to the beginning first. If EOF is reached and the
484 file wasn't terminated by a newline, supply one. Return a count
485 of bytes buffered. */
487 static int
488 fillbuf (struct buffer *buf, FILE *fp)
490 int cc;
492 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
493 buf->used = buf->left;
495 while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, eolchar, buf->used)))
497 if (buf->used == buf->alloc)
499 buf->alloc *= 2;
500 buf->buf = xrealloc (buf->buf, buf->alloc);
502 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
503 if (ferror (fp))
505 error (0, errno, _("read error"));
506 cleanup ();
507 exit (SORT_FAILURE);
509 buf->used += cc;
512 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
514 if (buf->used == buf->alloc)
516 buf->alloc *= 2;
517 buf->buf = xrealloc (buf->buf, buf->alloc);
519 buf->buf[buf->used++] = eolchar;
522 return buf->used;
525 /* Initialize LINES, allocating space for ALLOC lines initially.
526 LIMIT is the maximum possible number of lines to allocate space
527 for, ever. */
529 static void
530 initlines (struct lines *lines, int alloc, int limit)
532 lines->alloc = alloc;
533 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
534 lines->used = 0;
535 lines->limit = limit;
538 /* Return a pointer to the first character of the field specified
539 by KEY in LINE. */
541 static char *
542 begfield (const struct line *line, const struct keyfield *key)
544 register char *ptr = line->text, *lim = ptr + line->length;
545 register int sword = key->sword, schar = key->schar;
547 if (tab)
548 while (ptr < lim && sword--)
550 while (ptr < lim && *ptr != tab)
551 ++ptr;
552 if (ptr < lim)
553 ++ptr;
555 else
556 while (ptr < lim && sword--)
558 while (ptr < lim && blanks[UCHAR (*ptr)])
559 ++ptr;
560 while (ptr < lim && !blanks[UCHAR (*ptr)])
561 ++ptr;
564 if (key->skipsblanks)
565 while (ptr < lim && blanks[UCHAR (*ptr)])
566 ++ptr;
568 if (ptr + schar <= lim)
569 ptr += schar;
570 else
571 ptr = lim;
573 return ptr;
576 /* Return the limit of (a pointer to the first character after) the field
577 in LINE specified by KEY. */
579 static char *
580 limfield (const struct line *line, const struct keyfield *key)
582 register char *ptr = line->text, *lim = ptr + line->length;
583 register int eword = key->eword, echar = key->echar;
585 /* Note: from the POSIX spec:
586 The leading field separator itself is included in
587 a field when -t is not used. FIXME: move this comment up... */
589 /* Move PTR past EWORD fields or to one past the last byte on LINE,
590 whichever comes first. If there are more than EWORD fields, leave
591 PTR pointing at the beginning of the field having zero-based index,
592 EWORD. If a delimiter character was specified (via -t), then that
593 `beginning' is the first character following the delimiting TAB.
594 Otherwise, leave PTR pointing at the first `blank' character after
595 the preceding field. */
596 if (tab)
597 while (ptr < lim && eword--)
599 while (ptr < lim && *ptr != tab)
600 ++ptr;
601 if (ptr < lim && (eword || echar > 0))
602 ++ptr;
604 else
605 while (ptr < lim && eword--)
607 while (ptr < lim && blanks[UCHAR (*ptr)])
608 ++ptr;
609 while (ptr < lim && !blanks[UCHAR (*ptr)])
610 ++ptr;
613 #ifdef POSIX_UNSPECIFIED
614 /* The following block of code makes GNU sort incompatible with
615 standard Unix sort, so it's ifdef'd out for now.
616 The POSIX spec isn't clear on how to interpret this.
617 FIXME: request clarification.
619 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
620 Date: Thu, 30 May 96 12:20:41 -0400
622 [...]I believe I've found another bug in `sort'.
624 $ cat /tmp/sort.in
625 a b c 2 d
626 pq rs 1 t
627 $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
628 a b c 2 d
629 pq rs 1 t
630 $ /bin/sort +0.6 -0.7 </tmp/sort.in
631 pq rs 1 t
632 a b c 2 d
634 Unix sort produced the answer I expected: sort on the single character
635 in column 6. GNU sort produced different results, because it disagrees
636 on the interpretation of the key-end spec "-M.N". Unix sort reads this
637 as "skip M fields, then N characters"; but GNU sort wants it to mean
638 "skip M fields, then either N characters or the rest of the current
639 field, whichever comes first". This extra clause applies only to
640 key-ends, not key-starts.
643 /* Make LIM point to the end of (one byte past) the current field. */
644 if (tab)
646 char *newlim;
647 newlim = memchr (ptr, tab, lim - ptr);
648 if (newlim)
649 lim = newlim;
651 else
653 char *newlim;
654 newlim = ptr;
655 while (newlim < lim && blanks[UCHAR (*newlim)])
656 ++newlim;
657 while (newlim < lim && !blanks[UCHAR (*newlim)])
658 ++newlim;
659 lim = newlim;
661 #endif
663 /* If we're skipping leading blanks, don't start counting characters
664 until after skipping past any leading blanks. */
665 if (key->skipsblanks)
666 while (ptr < lim && blanks[UCHAR (*ptr)])
667 ++ptr;
669 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
670 if (ptr + echar <= lim)
671 ptr += echar;
672 else
673 ptr = lim;
675 return ptr;
678 /* FIXME */
680 void
681 trim_trailing_blanks (const char *a_start, char **a_end)
683 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
684 --(*a_end);
687 /* Find the lines in BUF, storing pointers and lengths in LINES.
688 Also replace newlines in BUF with NULs. */
690 static void
691 findlines (struct buffer *buf, struct lines *lines)
693 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
694 struct keyfield *key = keyhead.next;
696 lines->used = 0;
698 while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))
699 && lines->used < lines->limit)
701 /* There are various places in the code that rely on a NUL
702 being at the end of in-core lines; NULs inside the lines
703 will not cause trouble, though. */
704 *ptr = '\0';
706 if (lines->used == lines->alloc)
708 lines->alloc *= 2;
709 lines->lines = (struct line *)
710 xrealloc ((char *) lines->lines,
711 lines->alloc * sizeof (struct line));
714 lines->lines[lines->used].text = beg;
715 lines->lines[lines->used].length = ptr - beg;
717 /* Precompute the position of the first key for efficiency. */
718 if (key)
720 if (key->eword >= 0)
721 lines->lines[lines->used].keylim =
722 limfield (&lines->lines[lines->used], key);
723 else
724 lines->lines[lines->used].keylim = ptr;
726 if (key->sword >= 0)
727 lines->lines[lines->used].keybeg =
728 begfield (&lines->lines[lines->used], key);
729 else
731 if (key->skipsblanks)
732 while (blanks[UCHAR (*beg)])
733 ++beg;
734 lines->lines[lines->used].keybeg = beg;
736 if (key->skipeblanks)
738 trim_trailing_blanks (lines->lines[lines->used].keybeg,
739 &lines->lines[lines->used].keylim);
742 else
744 lines->lines[lines->used].keybeg = 0;
745 lines->lines[lines->used].keylim = 0;
748 ++lines->used;
749 beg = ptr + 1;
752 buf->left = lim - beg;
755 /* Compare strings A and B containing decimal fractions < 1. Each string
756 should begin with a decimal point followed immediately by the digits
757 of the fraction. Strings not of this form are considered to be zero. */
759 static int
760 fraccompare (register const char *a, register const char *b)
762 register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
764 if (tmpa == '.' && tmpb == '.')
767 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
768 while (tmpa == tmpb && digits[tmpa]);
769 if (digits[tmpa] && digits[tmpb])
770 return tmpa - tmpb;
771 if (digits[tmpa])
773 while (tmpa == '0')
774 tmpa = UCHAR (*++a);
775 if (digits[tmpa])
776 return 1;
777 return 0;
779 if (digits[tmpb])
781 while (tmpb == '0')
782 tmpb = UCHAR (*++b);
783 if (digits[tmpb])
784 return -1;
785 return 0;
787 return 0;
789 else if (tmpa == '.')
792 tmpa = UCHAR (*++a);
793 while (tmpa == '0');
794 if (digits[tmpa])
795 return 1;
796 return 0;
798 else if (tmpb == '.')
801 tmpb = UCHAR (*++b);
802 while (tmpb == '0');
803 if (digits[tmpb])
804 return -1;
805 return 0;
807 return 0;
810 /* Compare strings A and B as numbers without explicitly converting them to
811 machine numbers. Comparatively slow for short strings, but asymptotically
812 hideously fast. */
814 static int
815 numcompare (register const char *a, register const char *b)
817 register int tmpa, tmpb, loga, logb, tmp;
819 tmpa = UCHAR (*a);
820 tmpb = UCHAR (*b);
822 while (blanks[tmpa])
823 tmpa = UCHAR (*++a);
824 while (blanks[tmpb])
825 tmpb = UCHAR (*++b);
827 if (tmpa == '-')
830 tmpa = UCHAR (*++a);
831 while (tmpa == '0');
832 if (tmpb != '-')
834 if (tmpa == '.')
836 tmpa = UCHAR (*++a);
837 while (tmpa == '0');
838 if (digits[tmpa])
839 return -1;
840 while (tmpb == '0')
841 tmpb = UCHAR (*++b);
842 if (tmpb == '.')
844 tmpb = *++b;
845 while (tmpb == '0');
846 if (digits[tmpb])
847 return -1;
848 return 0;
851 tmpb = UCHAR (*++b);
852 while (tmpb == '0');
854 while (tmpa == tmpb && digits[tmpa])
855 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
857 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
858 return -fraccompare (a, b);
860 if (digits[tmpa])
861 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
863 else
864 loga = 0;
866 if (digits[tmpb])
867 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
869 else
870 logb = 0;
872 if ((tmp = logb - loga) != 0)
873 return tmp;
875 if (!loga)
876 return 0;
878 return tmpb - tmpa;
880 else if (tmpb == '-')
883 tmpb = UCHAR (*++b);
884 while (tmpb == '0');
885 if (tmpb == '.')
887 tmpb = *++b;
888 while (tmpb == '0');
889 if (digits[tmpb])
890 return 1;
891 while (tmpa == '0')
892 tmpa = UCHAR (*++a);
893 if (tmpa == '.')
895 tmpa = UCHAR (*++a);
896 while (tmpa == '0');
897 if (digits[tmpa])
898 return 1;
899 return 0;
901 else
903 while (tmpa == '0')
904 tmpa = UCHAR (*++a);
905 while (tmpb == '0')
906 tmpb = UCHAR (*++b);
908 while (tmpa == tmpb && digits[tmpa])
909 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
911 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
912 return fraccompare (a, b);
914 if (digits[tmpa])
915 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
917 else
918 loga = 0;
920 if (digits[tmpb])
921 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
923 else
924 logb = 0;
926 if ((tmp = loga - logb) != 0)
927 return tmp;
929 if (!loga)
930 return 0;
932 return tmpa - tmpb;
936 static int
937 general_numcompare (const char *sa, const char *sb)
939 double a, b;
940 /* FIXME: add option to warn about failed conversions. */
941 /* FIXME: maybe add option to try expensive FP conversion
942 only if A and B can't be compared more cheaply/accurately. */
943 if (xstrtod (sa, NULL, &a))
945 a = 0;
947 if (xstrtod (sb, NULL, &b))
949 b = 0;
951 return a == b ? 0 : a < b ? -1 : 1;
954 /* Return an integer <= 12 associated with month name S with length LEN,
955 0 if the name in S is not recognized. */
957 static int
958 getmonth (const char *s, int len)
960 char month[4];
961 register int i, lo = 0, hi = 12;
963 while (len > 0 && blanks[UCHAR(*s)])
964 ++s, --len;
966 if (len < 3)
967 return 0;
969 for (i = 0; i < 3; ++i)
970 month[i] = fold_toupper[UCHAR (s[i])];
971 month[3] = '\0';
973 while (hi - lo > 1)
974 if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
975 hi = (lo + hi) / 2;
976 else
977 lo = (lo + hi) / 2;
978 if (!strcmp (month, monthtab[lo].name))
979 return monthtab[lo].val;
980 return 0;
983 /* Compare two lines A and B trying every key in sequence until there
984 are no more keys or a difference is found. */
986 static int
987 keycompare (const struct line *a, const struct line *b)
989 register char *texta, *textb, *lima, *limb, *translate;
990 register int *ignore;
991 struct keyfield *key;
992 int diff = 0, iter = 0, lena, lenb;
994 for (key = keyhead.next; key; key = key->next, ++iter)
996 ignore = key->ignore;
997 translate = key->translate;
999 /* Find the beginning and limit of each field. */
1000 if (iter || a->keybeg == NULL || b->keybeg == NULL)
1002 if (key->eword >= 0)
1003 lima = limfield (a, key), limb = limfield (b, key);
1004 else
1005 lima = a->text + a->length, limb = b->text + b->length;
1007 if (key->sword >= 0)
1008 texta = begfield (a, key), textb = begfield (b, key);
1009 else
1011 texta = a->text, textb = b->text;
1012 if (key->skipsblanks)
1014 while (texta < lima && blanks[UCHAR (*texta)])
1015 ++texta;
1016 while (textb < limb && blanks[UCHAR (*textb)])
1017 ++textb;
1021 else
1023 /* For the first iteration only, the key positions have
1024 been precomputed for us. */
1025 texta = a->keybeg, lima = a->keylim;
1026 textb = b->keybeg, limb = b->keylim;
1029 /* Find the lengths. */
1030 lena = lima - texta, lenb = limb - textb;
1031 if (lena < 0)
1032 lena = 0;
1033 if (lenb < 0)
1034 lenb = 0;
1036 if (key->skipeblanks)
1038 char *a_end = texta + lena;
1039 char *b_end = textb + lenb;
1040 trim_trailing_blanks (texta, &a_end);
1041 trim_trailing_blanks (textb, &b_end);
1042 lena = a_end - texta;
1043 lenb = b_end - textb;
1046 /* Actually compare the fields. */
1047 if (key->numeric)
1049 if (*lima || *limb)
1051 char savea = *lima, saveb = *limb;
1053 *lima = *limb = '\0';
1054 diff = numcompare (texta, textb);
1055 *lima = savea, *limb = saveb;
1057 else
1058 diff = numcompare (texta, textb);
1060 if (diff)
1061 return key->reverse ? -diff : diff;
1062 continue;
1064 else if (key->general_numeric)
1066 if (*lima || *limb)
1068 char savea = *lima, saveb = *limb;
1070 *lima = *limb = '\0';
1071 diff = general_numcompare (texta, textb);
1072 *lima = savea, *limb = saveb;
1074 else
1075 diff = general_numcompare (texta, textb);
1077 if (diff)
1078 return key->reverse ? -diff : diff;
1079 continue;
1081 else if (key->month)
1083 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1084 if (diff)
1085 return key->reverse ? -diff : diff;
1086 continue;
1088 else if (ignore && translate)
1090 #define CMP_WITH_IGNORE(A, B) \
1091 do \
1093 while (texta < lima && textb < limb) \
1095 while (texta < lima && ignore[UCHAR (*texta)]) \
1096 ++texta; \
1097 while (textb < limb && ignore[UCHAR (*textb)]) \
1098 ++textb; \
1099 if (texta < lima && textb < limb) \
1101 if ((A) != (B)) \
1103 diff = (A) - (B); \
1104 break; \
1106 ++texta; \
1107 ++textb; \
1110 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1111 diff = -1; \
1112 else if (texta < lima && textb == limb \
1113 && !ignore[UCHAR (*texta)]) \
1114 diff = 1; \
1117 if (diff == 0) \
1119 while (texta < lima && ignore[UCHAR (*texta)]) \
1120 ++texta; \
1121 while (textb < limb && ignore[UCHAR (*textb)]) \
1122 ++textb; \
1124 if (texta == lima && textb < limb) \
1125 diff = -1; \
1126 else if (texta < lima && textb == limb) \
1127 diff = 1; \
1129 /* Relative lengths are meaningless if characters were ignored. \
1130 Handling this case here avoids what might be an invalid length \
1131 comparison below. */ \
1132 if (diff == 0 && texta == lima && textb == limb) \
1133 return 0; \
1135 while (0)
1137 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1138 else if (ignore)
1139 CMP_WITH_IGNORE (*texta, *textb);
1140 else if (translate)
1141 while (texta < lima && textb < limb)
1143 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1145 diff = (translate[UCHAR (*--texta)]
1146 - translate[UCHAR (*--textb)]);
1147 break;
1150 else
1151 diff = memcmp (texta, textb, min (lena, lenb));
1153 if (diff)
1154 return key->reverse ? -diff : diff;
1155 if ((diff = lena - lenb) != 0)
1156 return key->reverse ? -diff : diff;
1159 return 0;
1162 /* Compare two lines A and B, returning negative, zero, or positive
1163 depending on whether A compares less than, equal to, or greater than B. */
1165 static int
1166 compare (register const struct line *a, register const struct line *b)
1168 int diff, tmpa, tmpb, mini;
1170 /* First try to compare on the specified keys (if any).
1171 The only two cases with no key at all are unadorned sort,
1172 and unadorned sort -r. */
1173 if (keyhead.next)
1175 diff = keycompare (a, b);
1176 if (diff != 0)
1177 return diff;
1178 if (unique || stable)
1179 return 0;
1182 /* If the keys all compare equal (or no keys were specified)
1183 fall through to the default byte-by-byte comparison. */
1184 tmpa = a->length, tmpb = b->length;
1185 mini = min (tmpa, tmpb);
1186 if (mini == 0)
1187 diff = tmpa - tmpb;
1188 else
1190 char *ap = a->text, *bp = b->text;
1192 diff = UCHAR (*ap) - UCHAR (*bp);
1193 if (diff == 0)
1195 diff = memcmp (ap, bp, mini);
1196 if (diff == 0)
1197 diff = tmpa - tmpb;
1201 return reverse ? -diff : diff;
1204 /* Check that the lines read from the given FP come in order. Return
1205 1 if they do and 0 if there is a disorder.
1206 FIXME: return number of first out-of-order line if not sorted. */
1208 static int
1209 checkfp (FILE *fp)
1211 struct buffer buf; /* Input buffer. */
1212 struct lines lines; /* Lines scanned from the buffer. */
1213 struct line temp; /* Copy of previous line. */
1214 int cc; /* Character count. */
1215 int alloc, sorted = 1;
1217 initbuf (&buf, mergealloc);
1218 initlines (&lines, mergealloc / linelength + 1,
1219 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1220 alloc = linelength;
1221 temp.text = xmalloc (alloc);
1223 cc = fillbuf (&buf, fp);
1224 if (cc == 0)
1225 goto finish;
1227 findlines (&buf, &lines);
1229 while (1)
1231 struct line *prev_line; /* Pointer to previous line. */
1232 int cmp; /* Result of calling compare. */
1233 int i;
1235 /* Compare each line in the buffer with its successor. */
1236 for (i = 0; i < lines.used - 1; ++i)
1238 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1239 if ((unique && cmp >= 0) || (cmp > 0))
1241 sorted = 0;
1242 goto finish;
1246 /* Save the last line of the buffer and refill the buffer. */
1247 prev_line = lines.lines + (lines.used - 1);
1248 if (prev_line->length > alloc)
1250 while (prev_line->length + 1 > alloc)
1251 alloc *= 2;
1252 temp.text = xrealloc (temp.text, alloc);
1254 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1255 temp.length = prev_line->length;
1256 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1257 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1259 cc = fillbuf (&buf, fp);
1260 if (cc == 0)
1261 break;
1263 findlines (&buf, &lines);
1264 /* Make sure the line saved from the old buffer contents is
1265 less than or equal to the first line of the new buffer. */
1266 cmp = compare (&temp, &lines.lines[0]);
1267 if ((unique && cmp >= 0) || (cmp > 0))
1269 sorted = 0;
1270 break;
1274 finish:
1275 xfclose (fp);
1276 free (buf.buf);
1277 free ((char *) lines.lines);
1278 free (temp.text);
1279 return sorted;
1282 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1283 Close FPS before returning. */
1285 static void
1286 mergefps (FILE **fps, register int nfps, FILE *ofp)
1288 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1289 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1290 struct line saved; /* Saved line for unique check. */
1291 int savedflag = 0; /* True if there is a saved line. */
1292 int savealloc; /* Size allocated for the saved line. */
1293 int cur[NMERGE]; /* Current line in each line table. */
1294 int ord[NMERGE]; /* Table representing a permutation of fps,
1295 such that lines[ord[0]].lines[cur[ord[0]]]
1296 is the smallest line and will be next
1297 output. */
1298 register int i, j, t;
1300 #ifdef lint /* Suppress `used before initialized' warning. */
1301 savealloc = 0;
1302 #endif
1304 /* Allocate space for a saved line if necessary. */
1305 if (unique)
1307 savealloc = linelength;
1308 saved.text = xmalloc (savealloc);
1311 /* Read initial lines from each input file. */
1312 for (i = 0; i < nfps; ++i)
1314 initbuf (&buffer[i], mergealloc);
1315 /* If a file is empty, eliminate it from future consideration. */
1316 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1318 xfclose (fps[i]);
1319 --nfps;
1320 for (j = i; j < nfps; ++j)
1321 fps[j] = fps[j + 1];
1323 if (i == nfps)
1324 free (buffer[i].buf);
1325 else
1327 initlines (&lines[i], mergealloc / linelength + 1,
1328 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1329 findlines (&buffer[i], &lines[i]);
1330 cur[i] = 0;
1334 /* Set up the ord table according to comparisons among input lines.
1335 Since this only reorders two items if one is strictly greater than
1336 the other, it is stable. */
1337 for (i = 0; i < nfps; ++i)
1338 ord[i] = i;
1339 for (i = 1; i < nfps; ++i)
1340 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1341 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1342 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1344 /* Repeatedly output the smallest line until no input remains. */
1345 while (nfps)
1347 /* If uniqified output is turned on, output only the first of
1348 an identical series of lines. */
1349 if (unique)
1351 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1353 write_bytes (saved.text, saved.length, ofp);
1354 putc (eolchar, ofp);
1355 savedflag = 0;
1357 if (!savedflag)
1359 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1361 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1362 savealloc *= 2;
1363 saved.text = xrealloc (saved.text, savealloc);
1365 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1366 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1367 saved.length + 1);
1368 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1370 saved.keybeg = saved.text +
1371 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1372 - lines[ord[0]].lines[cur[ord[0]]].text);
1374 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1376 saved.keylim = saved.text +
1377 (lines[ord[0]].lines[cur[ord[0]]].keylim
1378 - lines[ord[0]].lines[cur[ord[0]]].text);
1380 savedflag = 1;
1383 else
1385 write_bytes (lines[ord[0]].lines[cur[ord[0]]].text,
1386 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1387 putc (eolchar, ofp);
1390 /* Check if we need to read more lines into core. */
1391 if (++cur[ord[0]] == lines[ord[0]].used)
1392 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1394 findlines (&buffer[ord[0]], &lines[ord[0]]);
1395 cur[ord[0]] = 0;
1397 else
1399 /* We reached EOF on fps[ord[0]]. */
1400 for (i = 1; i < nfps; ++i)
1401 if (ord[i] > ord[0])
1402 --ord[i];
1403 --nfps;
1404 xfclose (fps[ord[0]]);
1405 free (buffer[ord[0]].buf);
1406 free ((char *) lines[ord[0]].lines);
1407 for (i = ord[0]; i < nfps; ++i)
1409 fps[i] = fps[i + 1];
1410 buffer[i] = buffer[i + 1];
1411 lines[i] = lines[i + 1];
1412 cur[i] = cur[i + 1];
1414 for (i = 0; i < nfps; ++i)
1415 ord[i] = ord[i + 1];
1416 continue;
1419 /* The new line just read in may be larger than other lines
1420 already in core; push it back in the queue until we encounter
1421 a line larger than it. */
1422 for (i = 1; i < nfps; ++i)
1424 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1425 &lines[ord[i]].lines[cur[ord[i]]]);
1426 if (!t)
1427 t = ord[0] - ord[i];
1428 if (t < 0)
1429 break;
1431 t = ord[0];
1432 for (j = 1; j < i; ++j)
1433 ord[j - 1] = ord[j];
1434 ord[i - 1] = t;
1437 if (unique && savedflag)
1439 write_bytes (saved.text, saved.length, ofp);
1440 putc (eolchar, ofp);
1441 free (saved.text);
1445 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1447 static void
1448 sortlines (struct line *lines, int nlines, struct line *temp)
1450 register struct line *lo, *hi, *t;
1451 register int nlo, nhi;
1453 if (nlines == 2)
1455 if (compare (&lines[0], &lines[1]) > 0)
1456 *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1457 return;
1460 nlo = nlines / 2;
1461 lo = lines;
1462 nhi = nlines - nlo;
1463 hi = lines + nlo;
1465 if (nlo > 1)
1466 sortlines (lo, nlo, temp);
1468 if (nhi > 1)
1469 sortlines (hi, nhi, temp);
1471 t = temp;
1473 while (nlo && nhi)
1474 if (compare (lo, hi) <= 0)
1475 *t++ = *lo++, --nlo;
1476 else
1477 *t++ = *hi++, --nhi;
1478 while (nlo--)
1479 *t++ = *lo++;
1481 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1482 *lo++ = *t++;
1485 /* Check that each of the NFILES FILES is ordered.
1486 Return a count of disordered files. */
1488 static int
1489 check (char **files, int nfiles)
1491 int i, disorders = 0;
1492 FILE *fp;
1494 for (i = 0; i < nfiles; ++i)
1496 fp = xfopen (files[i], "r");
1497 if (!checkfp (fp))
1499 fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]);
1500 ++disorders;
1503 return disorders;
1506 /* Merge NFILES FILES onto OFP. */
1508 static void
1509 merge (char **files, int nfiles, FILE *ofp)
1511 int i, j, t;
1512 char *temp;
1513 FILE *fps[NMERGE], *tfp;
1515 while (nfiles > NMERGE)
1517 t = 0;
1518 for (i = 0; i < nfiles / NMERGE; ++i)
1520 for (j = 0; j < NMERGE; ++j)
1521 fps[j] = xfopen (files[i * NMERGE + j], "r");
1522 tfp = xtmpfopen (temp = tempname ());
1523 mergefps (fps, NMERGE, tfp);
1524 xfclose (tfp);
1525 for (j = 0; j < NMERGE; ++j)
1526 zaptemp (files[i * NMERGE + j]);
1527 files[t++] = temp;
1529 for (j = 0; j < nfiles % NMERGE; ++j)
1530 fps[j] = xfopen (files[i * NMERGE + j], "r");
1531 tfp = xtmpfopen (temp = tempname ());
1532 mergefps (fps, nfiles % NMERGE, tfp);
1533 xfclose (tfp);
1534 for (j = 0; j < nfiles % NMERGE; ++j)
1535 zaptemp (files[i * NMERGE + j]);
1536 files[t++] = temp;
1537 nfiles = t;
1540 for (i = 0; i < nfiles; ++i)
1541 fps[i] = xfopen (files[i], "r");
1542 mergefps (fps, i, ofp);
1543 for (i = 0; i < nfiles; ++i)
1544 zaptemp (files[i]);
1547 /* Sort NFILES FILES onto OFP. */
1549 static void
1550 sort (char **files, int nfiles, FILE *ofp)
1552 struct buffer buf;
1553 struct lines lines;
1554 struct line *tmp;
1555 int i, ntmp;
1556 FILE *fp, *tfp;
1557 struct tempnode *node;
1558 int n_temp_files = 0;
1559 char **tempfiles;
1561 initbuf (&buf, sortalloc);
1562 initlines (&lines, sortalloc / linelength + 1,
1563 LINEALLOC / sizeof (struct line));
1564 ntmp = lines.alloc;
1565 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1567 while (nfiles--)
1569 fp = xfopen (*files++, "r");
1570 while (fillbuf (&buf, fp))
1572 findlines (&buf, &lines);
1573 if (lines.used > ntmp)
1575 while (lines.used > ntmp)
1576 ntmp *= 2;
1577 tmp = (struct line *)
1578 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1580 sortlines (lines.lines, lines.used, tmp);
1581 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1582 tfp = ofp;
1583 else
1585 ++n_temp_files;
1586 tfp = xtmpfopen (tempname ());
1588 for (i = 0; i < lines.used; ++i)
1589 if (!unique || i == 0
1590 || compare (&lines.lines[i], &lines.lines[i - 1]))
1592 write_bytes (lines.lines[i].text, lines.lines[i].length, tfp);
1593 putc (eolchar, tfp);
1595 if (tfp != ofp)
1596 xfclose (tfp);
1598 xfclose (fp);
1601 free (buf.buf);
1602 free ((char *) lines.lines);
1603 free ((char *) tmp);
1605 if (n_temp_files)
1607 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1608 i = n_temp_files;
1609 for (node = temphead.next; i > 0; node = node->next)
1610 tempfiles[--i] = node->name;
1611 merge (tempfiles, n_temp_files, ofp);
1612 free ((char *) tempfiles);
1616 /* Insert key KEY at the end of the list (`keyhead'). */
1618 static void
1619 insertkey (struct keyfield *key)
1621 struct keyfield *k = &keyhead;
1623 while (k->next)
1624 k = k->next;
1625 k->next = key;
1626 key->next = NULL;
1629 static void
1630 badfieldspec (const char *s)
1632 error (SORT_FAILURE, 0, _("invalid field specification `%s'"), s);
1635 /* Handle interrupts and hangups. */
1637 static void
1638 sighandler (int sig)
1640 #ifdef SA_INTERRUPT
1641 struct sigaction sigact;
1643 sigact.sa_handler = SIG_DFL;
1644 sigemptyset (&sigact.sa_mask);
1645 sigact.sa_flags = 0;
1646 sigaction (sig, &sigact, NULL);
1647 #else /* !SA_INTERRUPT */
1648 signal (sig, SIG_DFL);
1649 #endif /* SA_INTERRUPT */
1650 cleanup ();
1651 kill (getpid (), sig);
1654 /* Set the ordering options for KEY specified in S.
1655 Return the address of the first character in S that
1656 is not a valid ordering option.
1657 BLANKTYPE is the kind of blanks that 'b' should skip. */
1659 static char *
1660 set_ordering (register const char *s, struct keyfield *key,
1661 enum blanktype blanktype)
1663 while (*s)
1665 switch (*s)
1667 case 'b':
1668 if (blanktype == bl_start || blanktype == bl_both)
1669 key->skipsblanks = 1;
1670 if (blanktype == bl_end || blanktype == bl_both)
1671 key->skipeblanks = 1;
1672 break;
1673 case 'd':
1674 key->ignore = nondictionary;
1675 break;
1676 case 'f':
1677 key->translate = fold_toupper;
1678 break;
1679 case 'g':
1680 key->general_numeric = 1;
1681 break;
1682 case 'i':
1683 key->ignore = nonprinting;
1684 break;
1685 case 'M':
1686 key->month = 1;
1687 break;
1688 case 'n':
1689 key->numeric = 1;
1690 if (blanktype == bl_start || blanktype == bl_both)
1691 key->skipsblanks = 1;
1692 if (blanktype == bl_end || blanktype == bl_both)
1693 key->skipeblanks = 1;
1694 break;
1695 case 'r':
1696 key->reverse = 1;
1697 break;
1698 default:
1699 return (char *) s;
1701 ++s;
1703 return (char *) s;
1707 main (int argc, char **argv)
1709 struct keyfield *key = NULL, gkey;
1710 char *s;
1711 int i, t, t2;
1712 int checkonly = 0, mergeonly = 0, nfiles = 0;
1713 char *minus = "-", *outfile = minus, **files, *tmp;
1714 FILE *ofp;
1715 #ifdef SA_INTERRUPT
1716 struct sigaction oldact, newact;
1717 #endif /* SA_INTERRUPT */
1719 program_name = argv[0];
1720 setlocale (LC_ALL, "");
1721 bindtextdomain (PACKAGE, LOCALEDIR);
1722 textdomain (PACKAGE);
1724 parse_long_options (argc, argv, "sort", PACKAGE_VERSION, usage);
1726 have_read_stdin = 0;
1727 inittables ();
1729 temp_file_prefix = getenv ("TMPDIR");
1730 if (temp_file_prefix == NULL)
1731 temp_file_prefix = DEFAULT_TMPDIR;
1733 #ifdef SA_INTERRUPT
1734 newact.sa_handler = sighandler;
1735 sigemptyset (&newact.sa_mask);
1736 newact.sa_flags = 0;
1738 sigaction (SIGINT, NULL, &oldact);
1739 if (oldact.sa_handler != SIG_IGN)
1740 sigaction (SIGINT, &newact, NULL);
1741 sigaction (SIGHUP, NULL, &oldact);
1742 if (oldact.sa_handler != SIG_IGN)
1743 sigaction (SIGHUP, &newact, NULL);
1744 sigaction (SIGPIPE, NULL, &oldact);
1745 if (oldact.sa_handler != SIG_IGN)
1746 sigaction (SIGPIPE, &newact, NULL);
1747 sigaction (SIGTERM, NULL, &oldact);
1748 if (oldact.sa_handler != SIG_IGN)
1749 sigaction (SIGTERM, &newact, NULL);
1750 #else /* !SA_INTERRUPT */
1751 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1752 signal (SIGINT, sighandler);
1753 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1754 signal (SIGHUP, sighandler);
1755 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1756 signal (SIGPIPE, sighandler);
1757 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1758 signal (SIGTERM, sighandler);
1759 #endif /* !SA_INTERRUPT */
1761 gkey.sword = gkey.eword = -1;
1762 gkey.ignore = NULL;
1763 gkey.translate = NULL;
1764 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1765 gkey.skipsblanks = gkey.skipeblanks = 0;
1767 files = (char **) xmalloc (sizeof (char *) * argc);
1769 for (i = 1; i < argc; ++i)
1771 if (argv[i][0] == '+')
1773 if (key)
1774 insertkey (key);
1775 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1776 key->eword = -1;
1777 key->ignore = NULL;
1778 key->translate = NULL;
1779 key->skipsblanks = key->skipeblanks = 0;
1780 key->numeric = key->general_numeric = key->month = key->reverse = 0;
1781 s = argv[i] + 1;
1782 if (! (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])])))
1783 badfieldspec (argv[i]);
1784 for (t = 0; digits[UCHAR (*s)]; ++s)
1785 t = 10 * t + *s - '0';
1786 t2 = 0;
1787 if (*s == '.')
1788 for (++s; digits[UCHAR (*s)]; ++s)
1789 t2 = 10 * t2 + *s - '0';
1790 if (t2 || t)
1792 key->sword = t;
1793 key->schar = t2;
1795 else
1796 key->sword = -1;
1797 s = set_ordering (s, key, bl_start);
1798 if (*s)
1799 badfieldspec (argv[i]);
1801 else if (argv[i][0] == '-' && argv[i][1])
1803 s = argv[i] + 1;
1804 if (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])]))
1806 if (!key)
1808 /* Provoke with `sort -9'. */
1809 error (0, 0, _("when using the old-style +POS and -POS \
1810 key specifiers,\nthe +POS specifier must come first"));
1811 usage (SORT_FAILURE);
1813 for (t = 0; digits[UCHAR (*s)]; ++s)
1814 t = t * 10 + *s - '0';
1815 t2 = 0;
1816 if (*s == '.')
1817 for (++s; digits[UCHAR (*s)]; ++s)
1818 t2 = t2 * 10 + *s - '0';
1819 key->eword = t;
1820 key->echar = t2;
1821 s = set_ordering (s, key, bl_end);
1822 if (*s)
1823 badfieldspec (argv[i]);
1824 insertkey (key);
1825 key = NULL;
1827 else
1828 while (*s)
1830 s = set_ordering (s, &gkey, bl_both);
1831 switch (*s)
1833 case '\0':
1834 break;
1835 case 'c':
1836 checkonly = 1;
1837 break;
1838 case 'k':
1839 if (s[1])
1840 ++s;
1841 else
1843 if (i == argc - 1)
1844 error (SORT_FAILURE, 0,
1845 _("option `-k' requires an argument"));
1846 else
1847 s = argv[++i];
1849 if (key)
1850 insertkey (key);
1851 key = (struct keyfield *)
1852 xmalloc (sizeof (struct keyfield));
1853 key->eword = -1;
1854 key->ignore = NULL;
1855 key->translate = NULL;
1856 key->skipsblanks = key->skipeblanks = 0;
1857 key->numeric = key->month = key->reverse = 0;
1858 /* Get POS1. */
1859 if (!digits[UCHAR (*s)])
1860 badfieldspec (argv[i]);
1861 for (t = 0; digits[UCHAR (*s)]; ++s)
1862 t = 10 * t + *s - '0';
1863 if (t == 0)
1865 /* Provoke with `sort -k0' */
1866 error (0, 0, _("the starting field number argument \
1867 to the `-k' option must be positive"));
1868 badfieldspec (argv[i]);
1870 --t;
1871 t2 = 0;
1872 if (*s == '.')
1874 if (!digits[UCHAR (s[1])])
1876 /* Provoke with `sort -k1.' */
1877 error (0, 0, _("starting field spec has `.' but \
1878 lacks following character offset"));
1879 badfieldspec (argv[i]);
1881 for (++s; digits[UCHAR (*s)]; ++s)
1882 t2 = 10 * t2 + *s - '0';
1883 if (t2 == 0)
1885 /* Provoke with `sort -k1.0' */
1886 error (0, 0, _("starting field character offset \
1887 argument to the `-k' option\nmust be positive"));
1888 badfieldspec (argv[i]);
1890 --t2;
1892 if (t2 || t)
1894 key->sword = t;
1895 key->schar = t2;
1897 else
1898 key->sword = -1;
1899 s = set_ordering (s, key, bl_start);
1900 if (*s == 0)
1902 key->eword = -1;
1903 key->echar = 0;
1905 else if (*s != ',')
1906 badfieldspec (argv[i]);
1907 else if (*s == ',')
1909 /* Skip over comma. */
1910 ++s;
1911 if (*s == 0)
1913 /* Provoke with `sort -k1,' */
1914 error (0, 0, _("field specification has `,' but \
1915 lacks following field spec"));
1916 badfieldspec (argv[i]);
1918 /* Get POS2. */
1919 for (t = 0; digits[UCHAR (*s)]; ++s)
1920 t = t * 10 + *s - '0';
1921 if (t == 0)
1923 /* Provoke with `sort -k1,0' */
1924 error (0, 0, _("ending field number argument \
1925 to the `-k' option must be positive"));
1926 badfieldspec (argv[i]);
1928 --t;
1929 t2 = 0;
1930 if (*s == '.')
1932 if (!digits[UCHAR (s[1])])
1934 /* Provoke with `sort -k1,1.' */
1935 error (0, 0, _("ending field spec has `.' \
1936 but lacks following character offset"));
1937 badfieldspec (argv[i]);
1939 for (++s; digits[UCHAR (*s)]; ++s)
1940 t2 = t2 * 10 + *s - '0';
1942 else
1944 /* `-k 2,3' is equivalent to `+1 -3'. */
1945 ++t;
1947 key->eword = t;
1948 key->echar = t2;
1949 s = set_ordering (s, key, bl_end);
1950 if (*s)
1951 badfieldspec (argv[i]);
1953 insertkey (key);
1954 key = NULL;
1955 goto outer;
1956 case 'm':
1957 mergeonly = 1;
1958 break;
1959 case 'o':
1960 if (s[1])
1961 outfile = s + 1;
1962 else
1964 if (i == argc - 1)
1965 error (SORT_FAILURE, 0,
1966 _("option `-o' requires an argument"));
1967 else
1968 outfile = argv[++i];
1970 goto outer;
1971 case 's':
1972 stable = 1;
1973 break;
1974 case 't':
1975 if (s[1])
1976 tab = *++s;
1977 else if (i < argc - 1)
1979 tab = *argv[++i];
1980 goto outer;
1982 else
1983 error (SORT_FAILURE, 0,
1984 _("option `-t' requires an argument"));
1985 break;
1986 case 'T':
1987 if (s[1])
1988 temp_file_prefix = ++s;
1989 else
1991 if (i < argc - 1)
1992 temp_file_prefix = argv[++i];
1993 else
1994 error (SORT_FAILURE, 0,
1995 _("option `-T' requires an argument"));
1997 goto outer;
1998 /* break; */
1999 case 'u':
2000 unique = 1;
2001 break;
2002 case 'z':
2003 eolchar = 0;
2004 break;
2005 case 'y':
2006 /* Accept and ignore e.g. -y0 for compatibility with
2007 Solaris 2. */
2008 goto outer;
2009 default:
2010 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2011 argv[0], *s);
2012 usage (SORT_FAILURE);
2014 if (*s)
2015 ++s;
2018 else /* Not an option. */
2020 files[nfiles++] = argv[i];
2022 outer:;
2025 if (key)
2026 insertkey (key);
2028 /* Inheritance of global options to individual keys. */
2029 for (key = keyhead.next; key; key = key->next)
2030 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2031 && !key->skipeblanks && !key->month && !key->numeric
2032 && !key->general_numeric)
2034 key->ignore = gkey.ignore;
2035 key->translate = gkey.translate;
2036 key->skipsblanks = gkey.skipsblanks;
2037 key->skipeblanks = gkey.skipeblanks;
2038 key->month = gkey.month;
2039 key->numeric = gkey.numeric;
2040 key->general_numeric = gkey.general_numeric;
2041 key->reverse = gkey.reverse;
2044 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2045 || gkey.skipeblanks || gkey.month || gkey.numeric
2046 || gkey.general_numeric))
2047 insertkey (&gkey);
2048 reverse = gkey.reverse;
2050 if (nfiles == 0)
2052 nfiles = 1;
2053 files = &minus;
2056 if (checkonly)
2058 /* POSIX requires that sort return 1 IFF invoked with -c and the
2059 input is not properly sorted. */
2060 exit (check (files, nfiles) == 0 ? 0 : 1);
2063 if (strcmp (outfile, "-"))
2065 struct stat outstat;
2066 if (stat (outfile, &outstat) == 0)
2068 /* The following code prevents a race condition when
2069 people use the brain dead shell programming idiom:
2070 cat file | sort -o file
2071 This feature is provided for historical compatibility,
2072 but we strongly discourage ever relying on this in
2073 new shell programs. */
2075 /* Temporarily copy each input file that might be another name
2076 for the output file. When in doubt (e.g. a pipe), copy. */
2077 for (i = 0; i < nfiles; ++i)
2079 char buf[8192];
2080 FILE *fp;
2081 int cc;
2083 if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
2085 struct stat instat;
2086 if ((strcmp (files[i], "-")
2087 ? stat (files[i], &instat)
2088 : fstat (STDIN_FILENO, &instat)) != 0)
2090 error (0, errno, "%s", files[i]);
2091 cleanup ();
2092 exit (SORT_FAILURE);
2094 if (S_ISREG (instat.st_mode)
2095 && (instat.st_ino != outstat.st_ino
2096 || instat.st_dev != outstat.st_dev))
2098 /* We know the files are distinct. */
2099 continue;
2103 fp = xfopen (files[i], "r");
2104 tmp = tempname ();
2105 ofp = xtmpfopen (tmp);
2106 while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
2107 write_bytes (buf, cc, ofp);
2108 if (ferror (fp))
2110 error (0, errno, "%s", files[i]);
2111 cleanup ();
2112 exit (SORT_FAILURE);
2114 xfclose (ofp);
2115 xfclose (fp);
2116 files[i] = tmp;
2119 ofp = xfopen (outfile, "w");
2121 else
2122 ofp = stdout;
2124 if (mergeonly)
2125 merge (files, nfiles, ofp);
2126 else
2127 sort (files, nfiles, ofp);
2128 cleanup ();
2130 /* If we wait for the implicit flush on exit, and the parent process
2131 has closed stdout (e.g., exec >&- in a shell), then the output file
2132 winds up empty. I don't understand why. This is under SunOS,
2133 Solaris, Ultrix, and Irix. This premature fflush makes the output
2134 reappear. --karl@cs.umb.edu */
2135 if (fflush (ofp) < 0)
2136 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2138 if (have_read_stdin && fclose (stdin) == EOF)
2139 error (SORT_FAILURE, errno, outfile);
2140 if (ferror (stdout) || fclose (stdout) == EOF)
2141 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2143 exit (EXIT_SUCCESS);