*** empty log message ***
[coreutils.git] / src / sort.c
blobeb620803f21aee432bb35d76b1362fc300cecf04
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 1991-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
24 #include <config.h>
26 #include <sys/types.h>
27 #include <signal.h>
28 #include <stdio.h>
29 #include <assert.h>
30 #include "system.h"
31 #include "long-options.h"
32 #include "error.h"
33 #include "hard-locale.h"
34 #include "memcoll.h"
35 #include "xalloc.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "sort"
40 #define AUTHORS "Mike Haertel"
42 #if defined ENABLE_NLS && HAVE_LANGINFO_H
43 # include <langinfo.h>
44 #endif
46 #if HAVE_PATHCONF && defined _PC_NAME_MAX
47 # define NAME_MAX_IN_DIR(Dir) pathconf (Dir, _PC_NAME_MAX)
48 #else
49 # define NAME_MAX_IN_DIR(Dir) 255
50 #endif
52 #ifndef STDC_HEADERS
53 double strtod ();
54 #endif
56 char *xstrdup ();
58 /* Undefine, to avoid warning about redefinition on some systems. */
59 #undef min
60 #define min(a, b) ((a) < (b) ? (a) : (b))
61 #undef max
62 #define max(a, b) ((a) > (b) ? (a) : (b))
64 #define UCHAR_LIM (UCHAR_MAX + 1)
65 #define UCHAR(c) ((unsigned char) (c))
67 #ifndef DEFAULT_TMPDIR
68 # define DEFAULT_TMPDIR "/tmp"
69 #endif
71 /* Use this as exit status in case of error, not EXIT_FAILURE. This
72 is necessary because EXIT_FAILURE is usually 1 and POSIX requires
73 that sort exit with status 1 IFF invoked with -c and the input is
74 not properly sorted. Any other irregular exit must exit with a
75 status code greater than 1. */
76 #define SORT_FAILURE 2
78 #define C_DECIMAL_POINT '.'
79 #define NEGATION_SIGN '-'
80 #define NUMERIC_ZERO '0'
82 #ifdef ENABLE_NLS
84 static char decimal_point;
85 static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
87 /* Nonzero if the corresponding locales are hard. */
88 static int hard_LC_COLLATE;
89 static int hard_LC_CTYPE;
90 # if HAVE_NL_LANGINFO
91 static int hard_LC_TIME;
92 # endif
94 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)
96 #else
98 # define decimal_point C_DECIMAL_POINT
99 # define IS_THOUSANDS_SEP(x) 0
101 #endif
103 /* The kind of blanks for '-b' to skip in various options. */
104 enum blanktype { bl_start, bl_end, bl_both };
106 /* The character marking end of line. Default to \n. */
107 int eolchar = '\n';
109 /* Lines are held in core as counted strings. */
110 struct line
112 char *text; /* Text of the line. */
113 int length; /* Length including final newline. */
114 char *keybeg; /* Start of first key. */
115 char *keylim; /* Limit of first key. */
118 /* Arrays of lines. */
119 struct lines
121 struct line *lines; /* Dynamically allocated array of lines. */
122 int used; /* Number of slots used. */
123 int alloc; /* Number of slots allocated. */
124 int limit; /* Max number of slots to allocate. */
127 /* Input buffers. */
128 struct buffer
130 char *buf; /* Dynamically allocated buffer. */
131 int used; /* Number of bytes used. */
132 int alloc; /* Number of bytes allocated. */
133 int left; /* Number of bytes left after line parsing. */
136 struct keyfield
138 int sword; /* Zero-origin 'word' to start at. */
139 int schar; /* Additional characters to skip. */
140 int skipsblanks; /* Skip leading white space at start. */
141 int eword; /* Zero-origin first word after field. */
142 int echar; /* Additional characters in field. */
143 int skipeblanks; /* Skip trailing white space at finish. */
144 int *ignore; /* Boolean array of characters to ignore. */
145 char *translate; /* Translation applied to characters. */
146 int numeric; /* Flag for numeric comparison. Handle
147 strings of digits with optional decimal
148 point, but no exponential notation. */
149 int general_numeric; /* Flag for general, numeric comparison.
150 Handle numbers in exponential notation. */
151 int month; /* Flag for comparison by month name. */
152 int reverse; /* Reverse the sense of comparison. */
153 struct keyfield *next; /* Next keyfield to try. */
156 struct month
158 char *name;
159 int val;
162 /* The name this program was run with. */
163 char *program_name;
165 /* Table of white space. */
166 static int blanks[UCHAR_LIM];
168 /* Table of non-printing characters. */
169 static int nonprinting[UCHAR_LIM];
171 /* Table of non-dictionary characters (not letters, digits, or blanks). */
172 static int nondictionary[UCHAR_LIM];
174 /* Translation table folding lower case to upper.
175 FIXME: This doesn't work with multibyte character sets. */
176 static char fold_toupper[UCHAR_LIM];
178 #define MONTHS_PER_YEAR 12
180 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
181 # define MONTHTAB_CONST /* empty */
182 #else
183 # define MONTHTAB_CONST const
184 #endif
186 /* Table mapping month names to integers.
187 Alphabetic order allows binary search. */
188 static MONTHTAB_CONST struct month monthtab[] =
190 {"APR", 4},
191 {"AUG", 8},
192 {"DEC", 12},
193 {"FEB", 2},
194 {"JAN", 1},
195 {"JUL", 7},
196 {"JUN", 6},
197 {"MAR", 3},
198 {"MAY", 5},
199 {"NOV", 11},
200 {"OCT", 10},
201 {"SEP", 9}
204 /* During the merge phase, the number of files to merge at once. */
205 #define NMERGE 16
207 /* Initial buffer size for in core sorting. Will not grow unless a
208 line longer than this is seen. */
209 static int sortalloc = 512 * 1024 + 1;
211 /* Initial buffer size for in core merge buffers. Bear in mind that
212 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
213 static int mergealloc = 16 * 1024 + 1;
215 /* Guess of average line length. */
216 static int linelength = 30;
218 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
219 #define LINEALLOC (256 * 1024)
221 /* Directory in which any temporary files are to be created. */
222 static char *temp_dir;
224 /* Flag to reverse the order of all comparisons. */
225 static int reverse;
227 /* Flag for stable sort. This turns off the last ditch bytewise
228 comparison of lines, and instead leaves lines in the same order
229 they were read if all keys compare equal. */
230 static int stable;
232 /* Tab character separating fields. If NUL, then fields are separated
233 by the empty string between a non-whitespace character and a whitespace
234 character. */
235 static char tab;
237 /* Flag to remove consecutive duplicate lines from the output.
238 Only the last of a sequence of equal lines will be output. */
239 static int unique;
241 /* Nonzero if any of the input files are the standard input. */
242 static int have_read_stdin;
244 /* Lists of key field comparisons to be tried. */
245 static struct keyfield keyhead;
247 void
248 usage (int status)
250 if (status != 0)
251 fprintf (stderr, _("Try `%s --help' for more information.\n"),
252 program_name);
253 else
255 printf (_("\
256 Usage: %s [OPTION]... [FILE]...\n\
258 program_name);
259 printf (_("\
260 Write sorted concatenation of all FILE(s) to standard output.\n\
262 +POS1 [-POS2] start a key at POS1, end it *before* POS2 (obsolescent)\n\
263 field numbers and character offsets are numbered\n\
264 starting with zero (contrast with the -k option)\n\
265 -b ignore leading blanks in sort fields or keys\n\
266 -c check if given files already sorted, do not sort\n\
267 -d consider only [a-zA-Z0-9 ] characters in keys\n\
268 -f fold lower case to upper case characters in keys\n\
269 -g compare according to general numerical value, imply -b\n\
270 -i consider only [\\040-\\0176] characters in keys\n\
271 -k POS1[,POS2] start a key at POS1, end it *at* POS2\n\
272 field numbers and character offsets are numbered\n\
273 starting with one (contrast with zero-based +POS form)\n\
274 -m merge already sorted files, do not sort\n\
275 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
276 -n compare according to string numerical value, imply -b\n\
277 -o FILE write result on FILE instead of standard output\n\
278 -r reverse the result of comparisons\n\
279 -s stabilize sort by disabling last resort comparison\n\
280 -t SEP use SEParator instead of non- to whitespace transition\n\
281 -T DIRECTORY use DIRECTORY for temporary files, not $TMPDIR or %s\n\
282 -u with -c, check for strict ordering;\n\
283 with -m, only output the first of an equal sequence\n\
284 -z end lines with 0 byte, not newline, for find -print0\n\
285 --help display this help and exit\n\
286 --version output version information and exit\n\
289 DEFAULT_TMPDIR);
290 printf (_("\
291 POS is F[.C][OPTS], where F is the field number and C the character position\n\
292 in the field, both counted from one with -k, from zero with the obsolescent\n\
293 form. OPTS is made up of one or more of Mbdfinr; this effectively disables\n\
294 global -Mbdfinr settings for that key. If no key is given, use the entire\n\
295 line as the key. With no FILE, or when FILE is -, read standard input.\n\
298 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
300 /* Don't use EXIT_FAILURE here in case it is defined to be 1.
301 POSIX requires that sort return 1 IFF invoked with -c and
302 the input is not properly sorted. */
303 assert (status == 0 || status == SORT_FAILURE);
304 exit (status);
307 /* The list of temporary files. */
308 static struct tempnode
310 char *name;
311 struct tempnode *next;
312 } temphead;
314 /* Clean up any remaining temporary files. */
316 static void
317 cleanup (void)
319 struct tempnode *node;
321 for (node = temphead.next; node; node = node->next)
322 unlink (node->name);
325 static FILE *
326 xtmpfopen (const char *file)
328 FILE *fp;
329 int fd;
331 /* Open temporary file exclusively, to foil a common
332 denial-of-service attack. */
333 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0600);
334 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
336 error (0, errno, "%s", file);
337 cleanup ();
338 exit (SORT_FAILURE);
341 return fp;
344 static FILE *
345 xfopen (const char *file, const char *how)
347 FILE *fp;
349 if (STREQ (file, "-"))
351 fp = stdin;
353 else
355 if ((fp = fopen (file, how)) == NULL)
357 error (0, errno, "%s", file);
358 cleanup ();
359 exit (SORT_FAILURE);
363 if (fp == stdin)
364 have_read_stdin = 1;
365 return fp;
368 static void
369 xfclose (FILE *fp)
371 if (fp == stdin)
373 /* Allow reading stdin from tty more than once. */
374 if (feof (fp))
375 clearerr (fp);
377 else if (fp == stdout)
379 if (fflush (fp) != 0)
381 error (0, errno, _("flushing file"));
382 cleanup ();
383 exit (SORT_FAILURE);
386 else
388 if (fclose (fp) != 0)
390 error (0, errno, _("error closing file"));
391 cleanup ();
392 exit (SORT_FAILURE);
397 static void
398 write_bytes (const char *buf, size_t n_bytes, FILE *fp)
400 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
402 error (0, errno, _("write error"));
403 cleanup ();
404 exit (SORT_FAILURE);
408 /* Return a name for a temporary file. */
410 static char *
411 tempname (void)
413 static unsigned int seq;
414 int len = strlen (temp_dir);
415 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
416 int long_file_names = NAME_MAX_IN_DIR (temp_dir) > 12;
417 struct tempnode *node;
419 /* If long filenames aren't supported, we cannot use filenames
420 longer than 8+3 and still assume they are unique. */
421 if (long_file_names)
422 sprintf (name,
423 "%s%ssort%5.5d%5.5d",
424 temp_dir,
425 (len && temp_dir[len - 1] != '/') ? "/" : "",
426 (unsigned int) getpid () & 0xffff, seq);
427 else
428 sprintf (name, "%s%ss%5.5d%2.2d.%3.3d",
429 temp_dir,
430 (len && temp_dir[len - 1] != '/') ? "/" : "",
431 (unsigned int) getpid () & 0xffff, seq / 1000, seq % 1000);
433 ++seq;
435 /* Make sure that SEQ's value fits in 5 digits if temp_dir is on
436 an 8.3 filesystem. */
437 if (!long_file_names && seq >= 100000)
438 seq = 0;
440 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
441 node->name = name;
442 node->next = temphead.next;
443 temphead.next = node;
444 return name;
447 /* Search through the list of temporary files for NAME;
448 remove it if it is found on the list. */
450 static void
451 zaptemp (const char *name)
453 struct tempnode *node, *temp;
455 for (node = &temphead; node->next; node = node->next)
456 if (STREQ (name, node->next->name))
457 break;
458 if (node->next)
460 temp = node->next;
461 unlink (temp->name);
462 free (temp->name);
463 node->next = temp->next;
464 free ((char *) temp);
468 #ifdef ENABLE_NLS
470 static int
471 struct_month_cmp (const void *m1, const void *m2)
473 return strcmp (((const struct month *) m1)->name,
474 ((const struct month *) m2)->name);
477 #endif /* NLS */
479 /* Initialize the character class tables. */
481 static void
482 inittables (void)
484 int i;
486 for (i = 0; i < UCHAR_LIM; ++i)
488 if (ISBLANK (i))
489 blanks[i] = 1;
490 if (!ISPRINT (i))
491 nonprinting[i] = 1;
492 if (!ISALNUM (i) && !ISBLANK (i))
493 nondictionary[i] = 1;
494 if (ISLOWER (i))
495 fold_toupper[i] = toupper (i);
496 else
497 fold_toupper[i] = i;
500 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
501 /* If we're not in the "C" locale, read different names for months. */
502 if (hard_LC_TIME)
504 for (i = 0; i < MONTHS_PER_YEAR; i++)
506 char *s;
507 size_t s_len;
508 size_t j;
509 char *name;
511 s = (char *) nl_langinfo (ABMON_1 + i);
512 s_len = strlen (s);
513 monthtab[i].name = name = (char *) xmalloc (s_len + 1);
514 monthtab[i].val = i + 1;
516 for (j = 0; j < s_len; j++)
517 name[j] = fold_toupper[UCHAR (s[j])];
518 name[j] = '\0';
520 qsort ((void *) monthtab, MONTHS_PER_YEAR,
521 sizeof (struct month), struct_month_cmp);
523 #endif /* NLS */
526 /* Initialize BUF, allocating ALLOC bytes initially. */
528 static void
529 initbuf (struct buffer *buf, int alloc)
531 buf->alloc = alloc;
532 buf->buf = xmalloc (buf->alloc);
533 buf->used = buf->left = 0;
536 /* Fill BUF reading from FP, moving buf->left bytes from the end
537 of buf->buf to the beginning first. If EOF is reached and the
538 file wasn't terminated by a newline, supply one. Always leave
539 at least one unused byte at the end. Return a count of bytes
540 buffered. */
542 static int
543 fillbuf (struct buffer *buf, FILE *fp)
545 int cc;
547 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
548 buf->used = buf->left;
550 while (!feof (fp) && (buf->used == 0
551 || !memchr (buf->buf, eolchar, buf->used)))
553 if (buf->used == buf->alloc - 1)
555 buf->alloc = buf->alloc * 2 - 1;
556 buf->buf = xrealloc (buf->buf, buf->alloc);
558 cc = fread (buf->buf + buf->used, 1, buf->alloc - 1 - buf->used, fp);
559 if (ferror (fp))
561 error (0, errno, _("read error"));
562 cleanup ();
563 exit (SORT_FAILURE);
565 buf->used += cc;
568 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
570 if (buf->used == buf->alloc - 1)
572 buf->alloc = buf->alloc * 2 - 1;
573 buf->buf = xrealloc (buf->buf, buf->alloc);
575 buf->buf[buf->used++] = eolchar;
578 return buf->used;
581 /* Initialize LINES, allocating space for ALLOC lines initially.
582 LIMIT is the maximum possible number of lines to allocate space
583 for, ever. */
585 static void
586 initlines (struct lines *lines, int alloc, int limit)
588 lines->alloc = alloc;
589 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
590 lines->used = 0;
591 lines->limit = limit;
594 /* Return a pointer to the first character of the field specified
595 by KEY in LINE. */
597 static char *
598 begfield (const struct line *line, const struct keyfield *key)
600 register char *ptr = line->text, *lim = ptr + line->length;
601 register int sword = key->sword, schar = key->schar;
603 if (tab)
604 while (ptr < lim && sword--)
606 while (ptr < lim && *ptr != tab)
607 ++ptr;
608 if (ptr < lim)
609 ++ptr;
611 else
612 while (ptr < lim && sword--)
614 while (ptr < lim && blanks[UCHAR (*ptr)])
615 ++ptr;
616 while (ptr < lim && !blanks[UCHAR (*ptr)])
617 ++ptr;
620 if (key->skipsblanks)
621 while (ptr < lim && blanks[UCHAR (*ptr)])
622 ++ptr;
624 if (ptr + schar <= lim)
625 ptr += schar;
626 else
627 ptr = lim;
629 return ptr;
632 /* Return the limit of (a pointer to the first character after) the field
633 in LINE specified by KEY. */
635 static char *
636 limfield (const struct line *line, const struct keyfield *key)
638 register char *ptr = line->text, *lim = ptr + line->length;
639 register int eword = key->eword, echar = key->echar;
641 /* Note: from the POSIX spec:
642 The leading field separator itself is included in
643 a field when -t is not used. FIXME: move this comment up... */
645 /* Move PTR past EWORD fields or to one past the last byte on LINE,
646 whichever comes first. If there are more than EWORD fields, leave
647 PTR pointing at the beginning of the field having zero-based index,
648 EWORD. If a delimiter character was specified (via -t), then that
649 `beginning' is the first character following the delimiting TAB.
650 Otherwise, leave PTR pointing at the first `blank' character after
651 the preceding field. */
652 if (tab)
653 while (ptr < lim && eword--)
655 while (ptr < lim && *ptr != tab)
656 ++ptr;
657 if (ptr < lim && (eword || echar > 0))
658 ++ptr;
660 else
661 while (ptr < lim && eword--)
663 while (ptr < lim && blanks[UCHAR (*ptr)])
664 ++ptr;
665 while (ptr < lim && !blanks[UCHAR (*ptr)])
666 ++ptr;
669 #ifdef POSIX_UNSPECIFIED
670 /* The following block of code makes GNU sort incompatible with
671 standard Unix sort, so it's ifdef'd out for now.
672 The POSIX spec isn't clear on how to interpret this.
673 FIXME: request clarification.
675 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
676 Date: Thu, 30 May 96 12:20:41 -0400
678 [...]I believe I've found another bug in `sort'.
680 $ cat /tmp/sort.in
681 a b c 2 d
682 pq rs 1 t
683 $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
684 a b c 2 d
685 pq rs 1 t
686 $ /bin/sort +0.6 -0.7 </tmp/sort.in
687 pq rs 1 t
688 a b c 2 d
690 Unix sort produced the answer I expected: sort on the single character
691 in column 6. GNU sort produced different results, because it disagrees
692 on the interpretation of the key-end spec "-M.N". Unix sort reads this
693 as "skip M fields, then N characters"; but GNU sort wants it to mean
694 "skip M fields, then either N characters or the rest of the current
695 field, whichever comes first". This extra clause applies only to
696 key-ends, not key-starts.
699 /* Make LIM point to the end of (one byte past) the current field. */
700 if (tab)
702 char *newlim;
703 newlim = memchr (ptr, tab, lim - ptr);
704 if (newlim)
705 lim = newlim;
707 else
709 char *newlim;
710 newlim = ptr;
711 while (newlim < lim && blanks[UCHAR (*newlim)])
712 ++newlim;
713 while (newlim < lim && !blanks[UCHAR (*newlim)])
714 ++newlim;
715 lim = newlim;
717 #endif
719 /* If we're skipping leading blanks, don't start counting characters
720 until after skipping past any leading blanks. */
721 if (key->skipsblanks)
722 while (ptr < lim && blanks[UCHAR (*ptr)])
723 ++ptr;
725 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
726 if (ptr + echar <= lim)
727 ptr += echar;
728 else
729 ptr = lim;
731 return ptr;
734 /* FIXME */
736 void
737 trim_trailing_blanks (const char *a_start, char **a_end)
739 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
740 --(*a_end);
743 /* Find the lines in BUF, storing pointers and lengths in LINES. */
745 static void
746 findlines (struct buffer *buf, struct lines *lines)
748 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
749 struct keyfield *key = keyhead.next;
751 lines->used = 0;
753 while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))
754 && lines->used < lines->limit)
756 if (lines->used == lines->alloc)
758 lines->alloc *= 2;
759 lines->lines = (struct line *)
760 xrealloc ((char *) lines->lines,
761 lines->alloc * sizeof (struct line));
764 ptr++;
765 lines->lines[lines->used].text = beg;
766 lines->lines[lines->used].length = ptr - beg;
768 /* Precompute the position of the first key for efficiency. */
769 if (key)
771 if (key->eword >= 0)
772 lines->lines[lines->used].keylim =
773 limfield (&lines->lines[lines->used], key);
774 else
775 lines->lines[lines->used].keylim = ptr;
777 if (key->sword >= 0)
778 lines->lines[lines->used].keybeg =
779 begfield (&lines->lines[lines->used], key);
780 else
782 if (key->skipsblanks)
783 while (blanks[UCHAR (*beg)])
784 ++beg;
785 lines->lines[lines->used].keybeg = beg;
787 if (key->skipeblanks)
789 trim_trailing_blanks (lines->lines[lines->used].keybeg,
790 &lines->lines[lines->used].keylim);
793 else
795 lines->lines[lines->used].keybeg = 0;
796 lines->lines[lines->used].keylim = 0;
799 ++lines->used;
800 beg = ptr;
803 buf->left = lim - beg;
806 /* Compare strings A and B containing decimal fractions < 1. Each string
807 should begin with a decimal point followed immediately by the digits
808 of the fraction. Strings not of this form are considered to be zero. */
810 /* The goal here, is to take two numbers a and b... compare these
811 in parallel. Instead of converting each, and then comparing the
812 outcome. Most likely stopping the comparison before the conversion
813 is complete. The algorithm used, in the old sort:
815 Algorithm: fraccompare
816 Action : compare two decimal fractions
817 accepts : char *a, char *b
818 returns : -1 if a<b, 0 if a=b, 1 if a>b.
819 implement:
821 if *a == decimal_point AND *b == decimal_point
822 find first character different in a and b.
823 if both are digits, return the difference *a - *b.
824 if *a is a digit
825 skip past zeros
826 if digit return 1, else 0
827 if *b is a digit
828 skip past zeros
829 if digit return -1, else 0
830 if *a is a decimal_point
831 skip past decimal_point and zeros
832 if digit return 1, else 0
833 if *b is a decimal_point
834 skip past decimal_point and zeros
835 if digit return -1, else 0
836 return 0 */
838 static int
839 fraccompare (register const char *a, register const char *b)
841 if (*a == decimal_point && *b == decimal_point)
843 while (*++a == *++b)
844 if (! ISDIGIT (*a))
845 return 0;
846 if (ISDIGIT (*a) && ISDIGIT (*b))
847 return *a - *b;
848 if (ISDIGIT (*a))
849 goto a_trailing_nonzero;
850 if (ISDIGIT (*b))
851 goto b_trailing_nonzero;
852 return 0;
854 else if (*a++ == decimal_point)
856 a_trailing_nonzero:
857 while (*a == NUMERIC_ZERO)
858 a++;
859 return ISDIGIT (*a);
861 else if (*b++ == decimal_point)
863 b_trailing_nonzero:
864 while (*b == NUMERIC_ZERO)
865 b++;
866 return - ISDIGIT (*b);
868 return 0;
871 /* Compare strings A and B as numbers without explicitly converting them to
872 machine numbers. Comparatively slow for short strings, but asymptotically
873 hideously fast. */
875 static int
876 numcompare (register const char *a, register const char *b)
878 register int tmpa, tmpb, loga, logb, tmp;
880 tmpa = *a;
881 tmpb = *b;
883 while (blanks[UCHAR (tmpa)])
884 tmpa = *++a;
885 while (blanks[UCHAR (tmpb)])
886 tmpb = *++b;
888 if (tmpa == NEGATION_SIGN)
891 tmpa = *++a;
892 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa));
893 if (tmpb != NEGATION_SIGN)
895 if (tmpa == decimal_point)
897 tmpa = *++a;
898 while (tmpa == NUMERIC_ZERO);
899 if (ISDIGIT (tmpa))
900 return -1;
901 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
902 tmpb = *++b;
903 if (tmpb == decimal_point)
905 tmpb = *++b;
906 while (tmpb == NUMERIC_ZERO);
907 if (ISDIGIT (tmpb))
908 return -1;
909 return 0;
912 tmpb = *++b;
913 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
915 while (tmpa == tmpb && ISDIGIT (tmpa))
918 tmpa = *++a;
919 while (IS_THOUSANDS_SEP (tmpa));
921 tmpb = *++b;
922 while (IS_THOUSANDS_SEP (tmpb));
925 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
926 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
927 return -fraccompare (a, b);
929 tmp = tmpb - tmpa;
931 for (loga = 0; ISDIGIT (tmpa); ++loga)
933 tmpa = *++a;
934 while (IS_THOUSANDS_SEP (tmpa));
936 for (logb = 0; ISDIGIT (tmpb); ++logb)
938 tmpb = *++b;
939 while (IS_THOUSANDS_SEP (tmpb));
941 if (logb - loga != 0)
942 return logb - loga;
944 if (!loga)
945 return 0;
947 return tmp;
949 else if (tmpb == NEGATION_SIGN)
952 tmpb = *++b;
953 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
954 if (tmpb == decimal_point)
956 tmpb = *++b;
957 while (tmpb == NUMERIC_ZERO);
958 if (ISDIGIT (tmpb))
959 return 1;
960 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
961 tmpa = *++a;
962 if (tmpa == decimal_point)
964 tmpa = *++a;
965 while (tmpa == NUMERIC_ZERO);
966 if (ISDIGIT (tmpa))
967 return 1;
968 return 0;
970 else
972 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
973 tmpa = *++a;
974 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
975 tmpb = *++b;
977 while (tmpa == tmpb && ISDIGIT (tmpa))
980 tmpa = *++a;
981 while (IS_THOUSANDS_SEP (tmpa));
983 tmpb = *++b;
984 while (IS_THOUSANDS_SEP (tmpb));
987 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
988 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
989 return fraccompare (a, b);
991 tmp = tmpa - tmpb;
993 for (loga = 0; ISDIGIT (tmpa); ++loga)
995 tmpa = *++a;
996 while (IS_THOUSANDS_SEP (tmpa));
998 for (logb = 0; ISDIGIT (tmpb); ++logb)
1000 tmpb = *++b;
1001 while (IS_THOUSANDS_SEP (tmpb));
1003 if (loga - logb != 0)
1004 return loga - logb;
1006 if (!loga)
1007 return 0;
1009 return tmp;
1013 static int
1014 general_numcompare (const char *sa, const char *sb)
1016 /* FIXME: add option to warn about failed conversions. */
1017 /* FIXME: maybe add option to try expensive FP conversion
1018 only if A and B can't be compared more cheaply/accurately. */
1020 char *ea;
1021 char *eb;
1022 double a = strtod (sa, &ea);
1023 double b = strtod (sb, &eb);
1025 /* Put conversion errors at the start of the collating sequence. */
1026 if (sa == ea)
1027 return sb == eb ? 0 : -1;
1028 if (sb == eb)
1029 return 1;
1031 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1032 conversion errors but before numbers; sort them by internal
1033 bit-pattern, for lack of a more portable alternative. */
1034 return (a < b ? -1
1035 : a > b ? 1
1036 : a == b ? 0
1037 : b == b ? -1
1038 : a == a ? 1
1039 : memcmp ((char *) &a, (char *) &b, sizeof a));
1042 /* Return an integer in 1..12 of the month name S with length LEN.
1043 Return 0 if the name in S is not recognized. */
1045 static int
1046 getmonth (const char *s, int len)
1048 char *month;
1049 register int i, lo = 0, hi = MONTHS_PER_YEAR, result;
1051 while (len > 0 && blanks[UCHAR (*s)])
1053 ++s;
1054 --len;
1057 if (len == 0)
1058 return 0;
1060 month = (char *) alloca (len + 1);
1061 for (i = 0; i < len; ++i)
1062 month[i] = fold_toupper[UCHAR (s[i])];
1063 while (blanks[UCHAR (month[i - 1])])
1064 --i;
1065 month[i] = '\0';
1069 int ix = (lo + hi) / 2;
1071 if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
1072 hi = ix;
1073 else
1074 lo = ix;
1076 while (hi - lo > 1);
1078 result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
1079 ? monthtab[lo].val : 0);
1081 return result;
1084 /* Compare two lines A and B trying every key in sequence until there
1085 are no more keys or a difference is found. */
1087 static int
1088 keycompare (const struct line *a, const struct line *b)
1090 register char *texta, *textb, *lima, *limb;
1091 register unsigned char *translate;
1092 register int *ignore;
1093 struct keyfield *key;
1094 int diff = 0, iter = 0, lena, lenb;
1096 for (key = keyhead.next; key; key = key->next, ++iter)
1098 int comparable_lengths = 1;
1100 ignore = key->ignore;
1101 translate = (unsigned char *) key->translate;
1103 /* Find the beginning and limit of each field. */
1104 if (iter || a->keybeg == NULL || b->keybeg == NULL)
1106 if (key->eword >= 0)
1107 lima = limfield (a, key), limb = limfield (b, key);
1108 else
1109 lima = a->text + a->length, limb = b->text + b->length;
1111 if (key->sword >= 0)
1112 texta = begfield (a, key), textb = begfield (b, key);
1113 else
1115 texta = a->text, textb = b->text;
1116 if (key->skipsblanks)
1118 while (texta < lima && blanks[UCHAR (*texta)])
1119 ++texta;
1120 while (textb < limb && blanks[UCHAR (*textb)])
1121 ++textb;
1125 else
1127 /* For the first iteration only, the key positions have
1128 been precomputed for us. */
1129 texta = a->keybeg, lima = a->keylim;
1130 textb = b->keybeg, limb = b->keylim;
1133 /* Find the lengths. */
1134 lena = lima - texta, lenb = limb - textb;
1135 if (lena < 0)
1136 lena = 0;
1137 if (lenb < 0)
1138 lenb = 0;
1140 if (key->skipeblanks)
1142 char *a_end = texta + lena;
1143 char *b_end = textb + lenb;
1144 trim_trailing_blanks (texta, &a_end);
1145 trim_trailing_blanks (textb, &b_end);
1146 lena = a_end - texta;
1147 lenb = b_end - textb;
1150 /* Actually compare the fields. */
1151 if (key->numeric | key->general_numeric)
1153 char savea, saveb;
1155 /* If the fields are adjacent, adjust the end of the earlier
1156 field back by 1 byte, since we temporarily modify the
1157 byte after the field during comparison. This can't
1158 change a numeric comparison, since the byte is a newline.
1159 If the earlier field is empty, adjust its start as well. */
1160 if (lima == textb)
1161 texta -= texta == lima--;
1162 if (limb == texta)
1163 textb -= textb == limb--;
1165 savea = *lima, saveb = *limb;
1166 *lima = *limb = '\0';
1167 diff = ((key->numeric ? numcompare : general_numcompare)
1168 (texta, textb));
1169 *lima = savea, *limb = saveb;
1170 if (diff)
1171 return key->reverse ? -diff : diff;
1172 continue;
1174 else if (key->month)
1176 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1177 if (diff)
1178 return key->reverse ? -diff : diff;
1179 continue;
1181 #ifdef ENABLE_NLS
1183 /* Sorting like this may become slow, so in a simple locale the user
1184 can select a faster sort that is similar to ascii sort */
1185 else if (hard_LC_COLLATE | hard_LC_CTYPE)
1187 if (ignore || translate)
1189 char *copy_a = (char *) alloca (lena + 1);
1190 char *copy_b = (char *) alloca (lenb + 1);
1191 int new_len_a, new_len_b, i;
1193 /* Ignore and/or translate chars before comparing. */
1194 for (new_len_a = new_len_b = i = 0; i < max (lena, lenb); i++)
1196 if (i < lena)
1198 copy_a[new_len_a] = (translate
1199 ? translate[UCHAR (texta[i])]
1200 : texta[i]);
1201 if (!ignore || !ignore[UCHAR (texta[i])])
1202 ++new_len_a;
1204 if (i < lenb)
1206 copy_b[new_len_b] = (translate
1207 ? translate[UCHAR (textb[i])]
1208 : textb [i]);
1209 if (!ignore || !ignore[UCHAR (textb[i])])
1210 ++new_len_b;
1214 diff = memcoll (copy_a, new_len_a, copy_b, new_len_b);
1216 else
1218 diff = memcoll (texta, lena, textb, lenb);
1221 if (diff)
1222 return key->reverse ? -diff : diff;
1224 continue;
1226 #endif
1227 else if (ignore && translate)
1229 #define CMP_WITH_IGNORE(A, B) \
1230 do \
1232 while (texta < lima && textb < limb) \
1234 while (texta < lima && ignore[UCHAR (*texta)]) \
1235 ++texta; \
1236 while (textb < limb && ignore[UCHAR (*textb)]) \
1237 ++textb; \
1238 if (texta < lima && textb < limb) \
1240 if ((A) != (B)) \
1242 diff = UCHAR (A) - UCHAR (B); \
1243 break; \
1245 ++texta; \
1246 ++textb; \
1249 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1250 diff = -1; \
1251 else if (texta < lima && textb == limb \
1252 && !ignore[UCHAR (*texta)]) \
1253 diff = 1; \
1256 if (diff == 0) \
1258 while (texta < lima && ignore[UCHAR (*texta)]) \
1259 ++texta; \
1260 while (textb < limb && ignore[UCHAR (*textb)]) \
1261 ++textb; \
1263 if (texta == lima && textb < limb) \
1264 diff = -1; \
1265 else if (texta < lima && textb == limb) \
1266 diff = 1; \
1268 /* Relative lengths are meaningless if characters were ignored. \
1269 Handling this case here avoids what might be an invalid length \
1270 comparison below. */ \
1271 if (diff == 0 && texta == lima && textb == limb) \
1272 comparable_lengths = 0; \
1274 while (0)
1276 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1277 else if (ignore)
1278 CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1279 else if (translate)
1280 while (texta < lima && textb < limb)
1282 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1284 diff = (UCHAR (translate[UCHAR (*--texta)])
1285 - UCHAR (translate[UCHAR (*--textb)]));
1286 break;
1289 else
1291 #ifdef ENABLE_NLS
1292 if (hard_LC_COLLATE)
1294 /* Ignore any length difference if the localized comparison
1295 says the strings are equal. */
1296 comparable_lengths = 0;
1297 diff = memcoll (texta, lena, textb, lenb);
1299 else
1300 #endif
1302 diff = memcmp (texta, textb, min (lena, lenb));
1306 if (diff)
1307 return key->reverse ? -diff : diff;
1308 if (comparable_lengths && (diff = lena - lenb) != 0)
1309 return key->reverse ? -diff : diff;
1312 return 0;
1315 /* Compare two lines A and B, returning negative, zero, or positive
1316 depending on whether A compares less than, equal to, or greater than B. */
1318 static int
1319 compare (register const struct line *a, register const struct line *b)
1321 int diff, tmpa, tmpb;
1323 /* First try to compare on the specified keys (if any).
1324 The only two cases with no key at all are unadorned sort,
1325 and unadorned sort -r. */
1326 if (keyhead.next)
1328 diff = keycompare (a, b);
1329 if (diff != 0 || unique || stable)
1331 alloca (0);
1332 return diff;
1336 /* If the keys all compare equal (or no keys were specified)
1337 fall through to the default byte-by-byte comparison. */
1338 tmpa = a->length, tmpb = b->length;
1340 #ifdef ENABLE_NLS
1341 if (hard_LC_COLLATE)
1343 diff = memcoll (a->text, tmpa, b->text, tmpb);
1344 alloca (0);
1345 return reverse ? -diff : diff;
1347 #endif
1349 diff = UCHAR (a->text[0]) - UCHAR (b->text[0]);
1350 if (diff == 0)
1352 diff = memcmp (a->text, b->text, min (tmpa, tmpb));
1353 if (diff == 0)
1354 diff = tmpa - tmpb;
1357 return reverse ? -diff : diff;
1360 /* Check that the lines read from the given FP come in order. Print a
1361 diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1362 the line number of the first out-of-order line (counting from 1) if they
1363 are not in order. Otherwise, print no diagnostic and return zero. */
1365 static int
1366 checkfp (FILE *fp, const char *file_name)
1368 struct buffer buf; /* Input buffer. */
1369 struct lines lines; /* Lines scanned from the buffer. */
1370 struct line temp; /* Copy of previous line. */
1371 int cc; /* Character count. */
1372 int alloc;
1373 int line_number = 1;
1374 struct line *disorder_line;
1375 int disorder_line_number = 0;
1377 #ifdef lint /* Suppress `used before initialized' warning. */
1378 disorder_line = NULL;
1379 #endif
1381 initbuf (&buf, mergealloc);
1382 initlines (&lines, mergealloc / linelength + 1,
1383 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1384 alloc = linelength;
1385 temp.text = xmalloc (alloc);
1387 cc = fillbuf (&buf, fp);
1388 if (cc == 0)
1389 goto finish;
1391 findlines (&buf, &lines);
1393 while (1)
1395 struct line *prev_line; /* Pointer to previous line. */
1396 int cmp; /* Result of calling compare. */
1397 int i;
1399 /* Compare each line in the buffer with its successor. */
1400 for (i = 0; i < lines.used - 1; ++i)
1402 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1403 if ((unique && cmp >= 0) || (cmp > 0))
1405 disorder_line = &lines.lines[i + 1];
1406 disorder_line_number = line_number + i + 1;
1407 goto finish;
1411 line_number += lines.used;
1413 /* Save the last line of the buffer and refill the buffer. */
1414 prev_line = lines.lines + (lines.used - 1);
1415 if (prev_line->length + 1 > alloc)
1419 alloc *= 2;
1421 while (alloc < prev_line->length + 1);
1422 temp.text = xrealloc (temp.text, alloc);
1424 assert (prev_line->length + 1 <= alloc);
1425 memcpy (temp.text, prev_line->text, prev_line->length);
1426 temp.length = prev_line->length;
1427 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1428 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1430 cc = fillbuf (&buf, fp);
1431 if (cc == 0)
1432 break;
1434 findlines (&buf, &lines);
1435 /* Make sure the line saved from the old buffer contents is
1436 less than or equal to the first line of the new buffer. */
1437 cmp = compare (&temp, &lines.lines[0]);
1438 if ((unique && cmp >= 0) || (cmp > 0))
1440 disorder_line = &lines.lines[0];
1441 disorder_line_number = line_number;
1442 break;
1446 finish:
1447 xfclose (fp);
1449 if (disorder_line_number)
1451 fprintf (stderr, _("%s: %s:%d: disorder: "), program_name, file_name,
1452 disorder_line_number);
1453 write_bytes (disorder_line->text, disorder_line->length, stderr);
1456 free (buf.buf);
1457 free ((char *) lines.lines);
1458 free (temp.text);
1459 return disorder_line_number;
1462 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1463 Close FPS before returning. */
1465 static void
1466 mergefps (FILE **fps, register int nfps, FILE *ofp)
1468 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1469 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1470 struct line saved; /* Saved line for unique check. */
1471 int savedflag = 0; /* True if there is a saved line. */
1472 int savealloc; /* Size allocated for the saved line. */
1473 int cur[NMERGE]; /* Current line in each line table. */
1474 int ord[NMERGE]; /* Table representing a permutation of fps,
1475 such that lines[ord[0]].lines[cur[ord[0]]]
1476 is the smallest line and will be next
1477 output. */
1478 register int i, j, t;
1480 #ifdef lint /* Suppress `used before initialized' warning. */
1481 savealloc = 0;
1482 #endif
1484 /* Allocate space for a saved line if necessary. */
1485 if (unique)
1487 savealloc = linelength;
1488 saved.text = xmalloc (savealloc);
1491 /* Read initial lines from each input file. */
1492 for (i = 0; i < nfps; ++i)
1494 initbuf (&buffer[i], mergealloc);
1495 /* If a file is empty, eliminate it from future consideration. */
1496 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1498 xfclose (fps[i]);
1499 --nfps;
1500 for (j = i; j < nfps; ++j)
1501 fps[j] = fps[j + 1];
1503 if (i == nfps)
1504 free (buffer[i].buf);
1505 else
1507 initlines (&lines[i], mergealloc / linelength + 1,
1508 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1509 findlines (&buffer[i], &lines[i]);
1510 cur[i] = 0;
1514 /* Set up the ord table according to comparisons among input lines.
1515 Since this only reorders two items if one is strictly greater than
1516 the other, it is stable. */
1517 for (i = 0; i < nfps; ++i)
1518 ord[i] = i;
1519 for (i = 1; i < nfps; ++i)
1520 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1521 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1522 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1524 /* Repeatedly output the smallest line until no input remains. */
1525 while (nfps)
1527 /* If uniquified output is turned on, output only the first of
1528 an identical series of lines. */
1529 if (unique)
1531 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1533 write_bytes (saved.text, saved.length, ofp);
1534 savedflag = 0;
1536 if (!savedflag)
1538 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1540 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1541 savealloc *= 2;
1542 saved.text = xrealloc (saved.text, savealloc);
1544 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1545 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1546 saved.length);
1547 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1549 saved.keybeg = saved.text +
1550 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1551 - lines[ord[0]].lines[cur[ord[0]]].text);
1553 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1555 saved.keylim = saved.text +
1556 (lines[ord[0]].lines[cur[ord[0]]].keylim
1557 - lines[ord[0]].lines[cur[ord[0]]].text);
1559 savedflag = 1;
1562 else
1563 write_bytes (lines[ord[0]].lines[cur[ord[0]]].text,
1564 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1566 /* Check if we need to read more lines into core. */
1567 if (++cur[ord[0]] == lines[ord[0]].used)
1569 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1571 findlines (&buffer[ord[0]], &lines[ord[0]]);
1572 cur[ord[0]] = 0;
1574 else
1576 /* We reached EOF on fps[ord[0]]. */
1577 for (i = 1; i < nfps; ++i)
1578 if (ord[i] > ord[0])
1579 --ord[i];
1580 --nfps;
1581 xfclose (fps[ord[0]]);
1582 free (buffer[ord[0]].buf);
1583 free ((char *) lines[ord[0]].lines);
1584 for (i = ord[0]; i < nfps; ++i)
1586 fps[i] = fps[i + 1];
1587 buffer[i] = buffer[i + 1];
1588 lines[i] = lines[i + 1];
1589 cur[i] = cur[i + 1];
1591 for (i = 0; i < nfps; ++i)
1592 ord[i] = ord[i + 1];
1593 continue;
1597 /* The new line just read in may be larger than other lines
1598 already in core; push it back in the queue until we encounter
1599 a line larger than it. */
1600 for (i = 1; i < nfps; ++i)
1602 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1603 &lines[ord[i]].lines[cur[ord[i]]]);
1604 if (!t)
1605 t = ord[0] - ord[i];
1606 if (t < 0)
1607 break;
1609 t = ord[0];
1610 for (j = 1; j < i; ++j)
1611 ord[j - 1] = ord[j];
1612 ord[i - 1] = t;
1615 if (unique && savedflag)
1617 write_bytes (saved.text, saved.length, ofp);
1618 free (saved.text);
1622 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1624 static void
1625 sortlines (struct line *lines, int nlines, struct line *temp)
1627 register struct line *lo, *hi, *t;
1628 register int nlo, nhi;
1630 if (nlines == 2)
1632 if (compare (&lines[0], &lines[1]) > 0)
1634 *temp = lines[0];
1635 lines[0] = lines[1];
1636 lines[1] = *temp;
1638 return;
1641 nlo = nlines / 2;
1642 lo = lines;
1643 nhi = nlines - nlo;
1644 hi = lines + nlo;
1646 if (nlo > 1)
1647 sortlines (lo, nlo, temp);
1649 if (nhi > 1)
1650 sortlines (hi, nhi, temp);
1652 t = temp;
1654 while (nlo && nhi)
1655 if (compare (lo, hi) <= 0)
1656 *t++ = *lo++, --nlo;
1657 else
1658 *t++ = *hi++, --nhi;
1659 while (nlo--)
1660 *t++ = *lo++;
1662 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1663 *lo++ = *t++;
1666 /* Check that each of the NFILES FILES is ordered.
1667 Return a count of disordered files. */
1669 static int
1670 check (char **files, int nfiles)
1672 int i, disorders = 0;
1673 FILE *fp;
1675 for (i = 0; i < nfiles; ++i)
1677 fp = xfopen (files[i], "r");
1678 if (checkfp (fp, files[i]))
1680 ++disorders;
1683 return disorders;
1686 /* Merge NFILES FILES onto OFP. */
1688 static void
1689 merge (char **files, int nfiles, FILE *ofp)
1691 int i, j, t;
1692 char *temp;
1693 FILE *fps[NMERGE], *tfp;
1695 while (nfiles > NMERGE)
1697 t = 0;
1698 for (i = 0; i < nfiles / NMERGE; ++i)
1700 for (j = 0; j < NMERGE; ++j)
1701 fps[j] = xfopen (files[i * NMERGE + j], "r");
1702 tfp = xtmpfopen (temp = tempname ());
1703 mergefps (fps, NMERGE, tfp);
1704 xfclose (tfp);
1705 for (j = 0; j < NMERGE; ++j)
1706 zaptemp (files[i * NMERGE + j]);
1707 files[t++] = temp;
1709 for (j = 0; j < nfiles % NMERGE; ++j)
1710 fps[j] = xfopen (files[i * NMERGE + j], "r");
1711 tfp = xtmpfopen (temp = tempname ());
1712 mergefps (fps, nfiles % NMERGE, tfp);
1713 xfclose (tfp);
1714 for (j = 0; j < nfiles % NMERGE; ++j)
1715 zaptemp (files[i * NMERGE + j]);
1716 files[t++] = temp;
1717 nfiles = t;
1720 for (i = 0; i < nfiles; ++i)
1721 fps[i] = xfopen (files[i], "r");
1722 mergefps (fps, i, ofp);
1723 for (i = 0; i < nfiles; ++i)
1724 zaptemp (files[i]);
1727 /* Sort NFILES FILES onto OFP. */
1729 static void
1730 sort (char **files, int nfiles, FILE *ofp)
1732 struct buffer buf;
1733 struct lines lines;
1734 struct line *tmp;
1735 int i, ntmp;
1736 FILE *fp, *tfp;
1737 struct tempnode *node;
1738 int n_temp_files = 0;
1739 char **tempfiles;
1741 initbuf (&buf, sortalloc);
1742 initlines (&lines, sortalloc / linelength + 1,
1743 LINEALLOC / sizeof (struct line));
1744 ntmp = lines.alloc;
1745 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1747 while (nfiles--)
1749 fp = xfopen (*files++, "r");
1750 while (fillbuf (&buf, fp))
1752 findlines (&buf, &lines);
1753 if (lines.used > ntmp)
1755 while (lines.used > ntmp)
1756 ntmp *= 2;
1757 tmp = (struct line *)
1758 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1760 sortlines (lines.lines, lines.used, tmp);
1761 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1763 tfp = ofp;
1765 else
1767 ++n_temp_files;
1768 tfp = xtmpfopen (tempname ());
1770 for (i = 0; i < lines.used; ++i)
1771 if (!unique || i == 0
1772 || compare (&lines.lines[i], &lines.lines[i - 1]))
1773 write_bytes (lines.lines[i].text, lines.lines[i].length, tfp);
1774 if (tfp != ofp)
1775 xfclose (tfp);
1777 xfclose (fp);
1780 free (buf.buf);
1781 free ((char *) lines.lines);
1782 free ((char *) tmp);
1784 if (n_temp_files)
1786 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1787 i = n_temp_files;
1788 for (node = temphead.next; i > 0; node = node->next)
1789 tempfiles[--i] = node->name;
1790 merge (tempfiles, n_temp_files, ofp);
1791 free ((char *) tempfiles);
1795 /* Insert key KEY at the end of the list (`keyhead'). */
1797 static void
1798 insertkey (struct keyfield *key)
1800 struct keyfield *k = &keyhead;
1802 while (k->next)
1803 k = k->next;
1804 k->next = key;
1805 key->next = NULL;
1808 static void
1809 badfieldspec (const char *s)
1811 error (SORT_FAILURE, 0, _("invalid field specification `%s'"), s);
1814 /* Handle interrupts and hangups. */
1816 static void
1817 sighandler (int sig)
1819 #ifdef SA_INTERRUPT
1820 struct sigaction sigact;
1822 sigact.sa_handler = SIG_DFL;
1823 sigemptyset (&sigact.sa_mask);
1824 sigact.sa_flags = 0;
1825 sigaction (sig, &sigact, NULL);
1826 #else /* !SA_INTERRUPT */
1827 signal (sig, SIG_DFL);
1828 #endif /* SA_INTERRUPT */
1829 cleanup ();
1830 kill (getpid (), sig);
1833 /* Set the ordering options for KEY specified in S.
1834 Return the address of the first character in S that
1835 is not a valid ordering option.
1836 BLANKTYPE is the kind of blanks that 'b' should skip. */
1838 static char *
1839 set_ordering (register const char *s, struct keyfield *key,
1840 enum blanktype blanktype)
1842 while (*s)
1844 switch (*s)
1846 case 'b':
1847 if (blanktype == bl_start || blanktype == bl_both)
1848 key->skipsblanks = 1;
1849 if (blanktype == bl_end || blanktype == bl_both)
1850 key->skipeblanks = 1;
1851 break;
1852 case 'd':
1853 key->ignore = nondictionary;
1854 break;
1855 case 'f':
1856 key->translate = fold_toupper;
1857 break;
1858 case 'g':
1859 key->general_numeric = 1;
1860 break;
1861 case 'i':
1862 key->ignore = nonprinting;
1863 break;
1864 case 'M':
1865 key->month = 1;
1866 break;
1867 case 'n':
1868 key->numeric = 1;
1869 break;
1870 case 'r':
1871 key->reverse = 1;
1872 break;
1873 default:
1874 return (char *) s;
1876 ++s;
1878 return (char *) s;
1881 static void
1882 key_init (struct keyfield *key)
1884 memset (key, 0, sizeof (*key));
1885 key->eword = -1;
1889 main (int argc, char **argv)
1891 struct keyfield *key = NULL, gkey;
1892 char *s;
1893 int i, t, t2;
1894 int checkonly = 0, mergeonly = 0, nfiles = 0;
1895 char *minus = "-", *outfile = minus, **files, *tmp;
1896 FILE *ofp;
1897 #ifdef SA_INTERRUPT
1898 struct sigaction oldact, newact;
1899 #endif /* SA_INTERRUPT */
1901 program_name = argv[0];
1902 setlocale (LC_ALL, "");
1903 bindtextdomain (PACKAGE, LOCALEDIR);
1904 textdomain (PACKAGE);
1906 #ifdef ENABLE_NLS
1908 hard_LC_COLLATE = hard_locale (LC_COLLATE);
1909 hard_LC_CTYPE = hard_locale (LC_CTYPE);
1910 # if HAVE_NL_LANGINFO
1911 hard_LC_TIME = hard_locale (LC_TIME);
1912 # endif
1914 /* Let's get locale's representation of the decimal point */
1916 struct lconv *lconvp = localeconv ();
1918 /* If the locale doesn't define a decimal point, or if the decimal
1919 point is multibyte, use the C decimal point. We don't support
1920 multibyte decimal points yet. */
1921 decimal_point = *lconvp->decimal_point;
1922 if (! decimal_point || lconvp->decimal_point[1])
1923 decimal_point = C_DECIMAL_POINT;
1925 /* We don't support multibyte thousands separators yet. */
1926 th_sep = *lconvp->thousands_sep;
1927 if (! th_sep || lconvp->thousands_sep[1])
1928 th_sep = CHAR_MAX + 1;
1931 #endif /* NLS */
1933 parse_long_options (argc, argv, PROGRAM_NAME, GNU_PACKAGE, VERSION,
1934 AUTHORS, usage);
1936 have_read_stdin = 0;
1937 inittables ();
1939 temp_dir = getenv ("TMPDIR");
1940 if (temp_dir == NULL)
1941 temp_dir = DEFAULT_TMPDIR;
1943 /* Change the way xmalloc and xrealloc fail. */
1944 xalloc_exit_failure = SORT_FAILURE;
1945 xalloc_fail_func = cleanup;
1947 #ifdef SA_INTERRUPT
1948 newact.sa_handler = sighandler;
1949 sigemptyset (&newact.sa_mask);
1950 newact.sa_flags = 0;
1952 sigaction (SIGINT, NULL, &oldact);
1953 if (oldact.sa_handler != SIG_IGN)
1954 sigaction (SIGINT, &newact, NULL);
1955 sigaction (SIGHUP, NULL, &oldact);
1956 if (oldact.sa_handler != SIG_IGN)
1957 sigaction (SIGHUP, &newact, NULL);
1958 sigaction (SIGPIPE, NULL, &oldact);
1959 if (oldact.sa_handler != SIG_IGN)
1960 sigaction (SIGPIPE, &newact, NULL);
1961 sigaction (SIGTERM, NULL, &oldact);
1962 if (oldact.sa_handler != SIG_IGN)
1963 sigaction (SIGTERM, &newact, NULL);
1964 #else /* !SA_INTERRUPT */
1965 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1966 signal (SIGINT, sighandler);
1967 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1968 signal (SIGHUP, sighandler);
1969 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1970 signal (SIGPIPE, sighandler);
1971 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1972 signal (SIGTERM, sighandler);
1973 #endif /* !SA_INTERRUPT */
1975 gkey.sword = gkey.eword = -1;
1976 gkey.ignore = NULL;
1977 gkey.translate = NULL;
1978 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1979 gkey.skipsblanks = gkey.skipeblanks = 0;
1981 files = (char **) xmalloc (sizeof (char *) * argc);
1983 for (i = 1; i < argc; ++i)
1985 if (argv[i][0] == '+')
1987 if (key)
1988 insertkey (key);
1989 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1990 key_init (key);
1991 s = argv[i] + 1;
1992 if (! (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1]))))
1993 badfieldspec (argv[i]);
1994 for (t = 0; ISDIGIT (*s); ++s)
1995 t = 10 * t + *s - '0';
1996 t2 = 0;
1997 if (*s == '.')
1998 for (++s; ISDIGIT (*s); ++s)
1999 t2 = 10 * t2 + *s - '0';
2000 if (t2 || t)
2002 key->sword = t;
2003 key->schar = t2;
2005 else
2006 key->sword = -1;
2007 s = set_ordering (s, key, bl_start);
2008 if (*s)
2009 badfieldspec (argv[i]);
2011 else if (argv[i][0] == '-' && argv[i][1])
2013 s = argv[i] + 1;
2014 if (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1])))
2016 if (!key)
2018 /* Provoke with `sort -9'. */
2019 error (0, 0, _("when using the old-style +POS and -POS \
2020 key specifiers,\nthe +POS specifier must come first"));
2021 usage (SORT_FAILURE);
2023 for (t = 0; ISDIGIT (*s); ++s)
2024 t = t * 10 + *s - '0';
2025 t2 = 0;
2026 if (*s == '.')
2027 for (++s; ISDIGIT (*s); ++s)
2028 t2 = t2 * 10 + *s - '0';
2029 key->eword = t;
2030 key->echar = t2;
2031 s = set_ordering (s, key, bl_end);
2032 if (*s)
2033 badfieldspec (argv[i]);
2034 insertkey (key);
2035 key = NULL;
2037 else
2038 while (*s)
2040 s = set_ordering (s, &gkey, bl_both);
2041 switch (*s)
2043 case '\0':
2044 break;
2045 case 'c':
2046 checkonly = 1;
2047 break;
2048 case 'k':
2049 if (s[1])
2050 ++s;
2051 else
2053 if (i == argc - 1)
2054 error (SORT_FAILURE, 0,
2055 _("option `-k' requires an argument"));
2056 else
2057 s = argv[++i];
2059 if (key)
2060 insertkey (key);
2061 key = (struct keyfield *)
2062 xmalloc (sizeof (struct keyfield));
2063 key_init (key);
2064 /* Get POS1. */
2065 if (!ISDIGIT (*s))
2066 badfieldspec (argv[i]);
2067 for (t = 0; ISDIGIT (*s); ++s)
2068 t = 10 * t + *s - '0';
2069 if (t == 0)
2071 /* Provoke with `sort -k0' */
2072 error (0, 0, _("the starting field number argument \
2073 to the `-k' option must be positive"));
2074 badfieldspec (argv[i]);
2076 --t;
2077 t2 = 0;
2078 if (*s == '.')
2080 if (!ISDIGIT (s[1]))
2082 /* Provoke with `sort -k1.' */
2083 error (0, 0, _("starting field spec has `.' but \
2084 lacks following character offset"));
2085 badfieldspec (argv[i]);
2087 for (++s; ISDIGIT (*s); ++s)
2088 t2 = 10 * t2 + *s - '0';
2089 if (t2 == 0)
2091 /* Provoke with `sort -k1.0' */
2092 error (0, 0, _("starting field character offset \
2093 argument to the `-k' option\nmust be positive"));
2094 badfieldspec (argv[i]);
2096 --t2;
2098 if (t2 || t)
2100 key->sword = t;
2101 key->schar = t2;
2103 else
2104 key->sword = -1;
2105 s = set_ordering (s, key, bl_start);
2106 if (*s == 0)
2108 key->eword = -1;
2109 key->echar = 0;
2111 else if (*s != ',')
2112 badfieldspec (argv[i]);
2113 else if (*s == ',')
2115 /* Skip over comma. */
2116 ++s;
2117 if (*s == 0)
2119 /* Provoke with `sort -k1,' */
2120 error (0, 0, _("field specification has `,' but \
2121 lacks following field spec"));
2122 badfieldspec (argv[i]);
2124 /* Get POS2. */
2125 for (t = 0; ISDIGIT (*s); ++s)
2126 t = t * 10 + *s - '0';
2127 if (t == 0)
2129 /* Provoke with `sort -k1,0' */
2130 error (0, 0, _("ending field number argument \
2131 to the `-k' option must be positive"));
2132 badfieldspec (argv[i]);
2134 --t;
2135 t2 = 0;
2136 if (*s == '.')
2138 if (!ISDIGIT (s[1]))
2140 /* Provoke with `sort -k1,1.' */
2141 error (0, 0, _("ending field spec has `.' \
2142 but lacks following character offset"));
2143 badfieldspec (argv[i]);
2145 for (++s; ISDIGIT (*s); ++s)
2146 t2 = t2 * 10 + *s - '0';
2148 else
2150 /* `-k 2,3' is equivalent to `+1 -3'. */
2151 ++t;
2153 key->eword = t;
2154 key->echar = t2;
2155 s = set_ordering (s, key, bl_end);
2156 if (*s)
2157 badfieldspec (argv[i]);
2159 insertkey (key);
2160 key = NULL;
2161 goto outer;
2162 case 'm':
2163 mergeonly = 1;
2164 break;
2165 case 'o':
2166 if (s[1])
2167 outfile = s + 1;
2168 else
2170 if (i == argc - 1)
2171 error (SORT_FAILURE, 0,
2172 _("option `-o' requires an argument"));
2173 else
2174 outfile = argv[++i];
2176 goto outer;
2177 case 's':
2178 stable = 1;
2179 break;
2180 case 't':
2181 if (s[1])
2182 tab = *++s;
2183 else if (i < argc - 1)
2185 tab = *argv[++i];
2186 goto outer;
2188 else
2189 error (SORT_FAILURE, 0,
2190 _("option `-t' requires an argument"));
2191 break;
2192 case 'T':
2193 if (s[1])
2194 temp_dir = ++s;
2195 else
2197 if (i < argc - 1)
2198 temp_dir = argv[++i];
2199 else
2200 error (SORT_FAILURE, 0,
2201 _("option `-T' requires an argument"));
2203 goto outer;
2204 /* break; */
2205 case 'u':
2206 unique = 1;
2207 break;
2208 case 'z':
2209 eolchar = 0;
2210 break;
2211 case 'y':
2212 /* Accept and ignore e.g. -y0 for compatibility with
2213 Solaris 2. */
2214 goto outer;
2215 default:
2216 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2217 argv[0], *s);
2218 usage (SORT_FAILURE);
2220 if (*s)
2221 ++s;
2224 else /* Not an option. */
2226 files[nfiles++] = argv[i];
2228 outer:;
2231 if (key)
2232 insertkey (key);
2234 /* Inheritance of global options to individual keys. */
2235 for (key = keyhead.next; key; key = key->next)
2236 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2237 && !key->skipeblanks && !key->month && !key->numeric
2238 && !key->general_numeric)
2240 key->ignore = gkey.ignore;
2241 key->translate = gkey.translate;
2242 key->skipsblanks = gkey.skipsblanks;
2243 key->skipeblanks = gkey.skipeblanks;
2244 key->month = gkey.month;
2245 key->numeric = gkey.numeric;
2246 key->general_numeric = gkey.general_numeric;
2247 key->reverse = gkey.reverse;
2250 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2251 || gkey.skipeblanks || gkey.month || gkey.numeric
2252 || gkey.general_numeric))
2253 insertkey (&gkey);
2254 reverse = gkey.reverse;
2256 if (nfiles == 0)
2258 nfiles = 1;
2259 files = &minus;
2262 if (checkonly)
2264 /* POSIX requires that sort return 1 IFF invoked with -c and the
2265 input is not properly sorted. */
2266 exit (check (files, nfiles) == 0 ? 0 : 1);
2269 if (!STREQ (outfile, "-"))
2271 struct stat outstat;
2272 if (stat (outfile, &outstat) == 0)
2274 /* The following code prevents a race condition when
2275 people use the brain dead shell programming idiom:
2276 cat file | sort -o file
2277 This feature is provided for historical compatibility,
2278 but we strongly discourage ever relying on this in
2279 new shell programs. */
2281 /* Temporarily copy each input file that might be another name
2282 for the output file. When in doubt (e.g. a pipe), copy. */
2283 for (i = 0; i < nfiles; ++i)
2285 char buf[8192];
2286 FILE *in_fp;
2287 FILE *out_fp;
2288 int cc;
2290 if (S_ISREG (outstat.st_mode) && !STREQ (outfile, files[i]))
2292 struct stat instat;
2293 if ((STREQ (files[i], "-")
2294 ? fstat (STDIN_FILENO, &instat)
2295 : stat (files[i], &instat)) != 0)
2297 error (0, errno, "%s", files[i]);
2298 cleanup ();
2299 exit (SORT_FAILURE);
2301 if (S_ISREG (instat.st_mode) && !SAME_INODE (instat, outstat))
2303 /* We know the files are distinct. */
2304 continue;
2308 in_fp = xfopen (files[i], "r");
2309 tmp = tempname ();
2310 out_fp = xtmpfopen (tmp);
2311 /* FIXME: maybe use copy.c(copy) here. */
2312 while ((cc = fread (buf, 1, sizeof buf, in_fp)) > 0)
2313 write_bytes (buf, cc, out_fp);
2314 if (ferror (in_fp))
2316 error (0, errno, "%s", files[i]);
2317 cleanup ();
2318 exit (SORT_FAILURE);
2320 xfclose (out_fp);
2321 xfclose (in_fp);
2322 files[i] = tmp;
2324 ofp = xfopen (outfile, "w");
2326 else
2328 /* A non-`-' outfile was specified, but the file doesn't yet exist.
2329 Before opening it for writing (thus creating it), make sure all
2330 of the input files exist. Otherwise, creating the output file
2331 could create an otherwise missing input file, making sort succeed
2332 when it should fail. */
2333 for (i = 0; i < nfiles; ++i)
2335 struct stat sb;
2336 if (STREQ (files[i], "-"))
2337 continue;
2338 if (stat (files[i], &sb))
2340 error (0, errno, "%s", files[i]);
2341 cleanup ();
2342 exit (SORT_FAILURE);
2346 ofp = xfopen (outfile, "w");
2349 else
2351 ofp = stdout;
2354 if (mergeonly)
2355 merge (files, nfiles, ofp);
2356 else
2357 sort (files, nfiles, ofp);
2358 cleanup ();
2360 /* If we wait for the implicit flush on exit, and the parent process
2361 has closed stdout (e.g., exec >&- in a shell), then the output file
2362 winds up empty. I don't understand why. This is under SunOS,
2363 Solaris, Ultrix, and Irix. This premature fflush makes the output
2364 reappear. --karl@cs.umb.edu */
2365 if (fflush (ofp) < 0)
2366 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2368 if (have_read_stdin && fclose (stdin) == EOF)
2369 error (SORT_FAILURE, errno, "%s", outfile);
2370 if (ferror (stdout) || fclose (stdout) == EOF)
2371 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2373 exit (EXIT_SUCCESS);