*** empty log message ***
[coreutils.git] / src / sort.c
blob3446abdc9d848c91fb48ae6129d5442170427ded
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 91-97, 98 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
24 #include <config.h>
26 #include <sys/types.h>
27 #include <signal.h>
28 #include <stdio.h>
29 #include <assert.h>
30 #include "system.h"
31 #include "long-options.h"
32 #include "error.h"
33 #include "xstrtod.h"
34 #include "xalloc.h"
36 #if defined ENABLE_NLS && HAVE_LANGINFO_H
37 # include <langinfo.h>
38 #endif
40 char *xstrdup ();
42 /* Undefine, to avoid warning about redefinition on some systems. */
43 #undef min
44 #define min(a, b) ((a) < (b) ? (a) : (b))
45 #undef max
46 #define max(a, b) ((a) > (b) ? (a) : (b))
48 #define UCHAR_LIM (UCHAR_MAX + 1)
49 #define UCHAR(c) ((unsigned char) (c))
51 #ifndef DEFAULT_TMPDIR
52 # define DEFAULT_TMPDIR "/tmp"
53 #endif
55 /* Use this as exit status in case of error, not EXIT_FAILURE. This
56 is necessary because EXIT_FAILURE is usually 1 and POSIX requires
57 that sort exit with status 1 IFF invoked with -c and the input is
58 not properly sorted. Any other irregular exit must exit with a
59 status code greater than 1. */
60 #define SORT_FAILURE 2
62 #define FLOATING_POINT '.'
63 #define FLOATING_COMMA ','
64 #define NEGATION_SIGN '-'
65 #define NUMERIC_ZERO '0'
67 #ifdef ENABLE_NLS
68 # define NLS_MEMCMP(S1, S2, Len) strncoll (S1, S2, Len)
69 # define NLS_STRNCMP(S1, S2, Len) strncoll_s2_readonly (S1, S2, Len)
70 #else
71 # define NLS_MEMCMP(S1, S2, Len) memcmp (S1, S2, Len)
72 # define NLS_STRNCMP(S1, S2, Len) strncmp (S1, S2, Len)
73 #endif
75 #ifdef ENABLE_NLS
77 static unsigned char decimal_point;
78 static unsigned char th_sep;
79 static char *nls_grouping;
81 /* This is "C" locale, need another? */
82 static int need_locale = 0;
84 /* Should we look for decimal point? */
85 static int nls_fraction_found = 1;
87 /* Look for month notations in text? */
88 static int nls_month_found = 1;
90 #else
92 # define decimal_point FLOATING_POINT
94 #endif
96 /* If native language support is requested, make a 1-1 map to the
97 locale character map, otherwise ensure normal behavior. */
98 #ifdef ENABLE_NLS
100 /* 12 months in a year */
101 # define NLS_NUM_MONTHS 12
103 /* Maximum number of elements, to allocate per allocation unit */
104 # define NLS_MAX_GROUPS 8
106 /* A string with one character, to enforce char collation */
107 # define NLS_ONE_CHARACTER_STRING " "
109 #endif
111 /* The kind of blanks for '-b' to skip in various options. */
112 enum blanktype { bl_start, bl_end, bl_both };
114 /* The character marking end of line. Default to \n. */
115 int eolchar = '\n';
117 /* Lines are held in core as counted strings. */
118 struct line
120 char *text; /* Text of the line. */
121 int length; /* Length not including final newline. */
122 char *keybeg; /* Start of first key. */
123 char *keylim; /* Limit of first key. */
126 /* Arrays of lines. */
127 struct lines
129 struct line *lines; /* Dynamically allocated array of lines. */
130 int used; /* Number of slots used. */
131 int alloc; /* Number of slots allocated. */
132 int limit; /* Max number of slots to allocate. */
135 /* Input buffers. */
136 struct buffer
138 char *buf; /* Dynamically allocated buffer. */
139 int used; /* Number of bytes used. */
140 int alloc; /* Number of bytes allocated. */
141 int left; /* Number of bytes left after line parsing. */
144 struct keyfield
146 int sword; /* Zero-origin 'word' to start at. */
147 int schar; /* Additional characters to skip. */
148 int skipsblanks; /* Skip leading white space at start. */
149 int eword; /* Zero-origin first word after field. */
150 int echar; /* Additional characters in field. */
151 int skipeblanks; /* Skip trailing white space at finish. */
152 int *ignore; /* Boolean array of characters to ignore. */
153 char *translate; /* Translation applied to characters. */
154 int numeric; /* Flag for numeric comparison. Handle
155 strings of digits with optional decimal
156 point, but no exponential notation. */
157 int general_numeric; /* Flag for general, numeric comparison.
158 Handle numbers in exponential notation. */
159 int month; /* Flag for comparison by month name. */
160 int reverse; /* Reverse the sense of comparison. */
161 struct keyfield *next; /* Next keyfield to try. */
164 struct month
166 char *name;
167 int val;
170 /* The name this program was run with. */
171 char *program_name;
173 /* Table of white space. */
174 static int blanks[UCHAR_LIM];
176 /* Table of non-printing characters. */
177 static int nonprinting[UCHAR_LIM];
179 /* Table of non-dictionary characters (not letters, digits, or blanks). */
180 static int nondictionary[UCHAR_LIM];
182 /* Translation table folding lower case to upper. */
183 static char fold_toupper[UCHAR_LIM];
185 /* Table mapping 3-letter month names to integers.
186 Alphabetic order allows binary search. */
187 static const struct month us_monthtab[] =
189 {"APR", 4},
190 {"AUG", 8},
191 {"DEC", 12},
192 {"FEB", 2},
193 {"JAN", 1},
194 {"JUL", 7},
195 {"JUN", 6},
196 {"MAR", 3},
197 {"MAY", 5},
198 {"NOV", 11},
199 {"OCT", 10},
200 {"SEP", 9}
203 #ifdef ENABLE_NLS
205 /* Locale may have a different idea of month names */
206 static struct month nls_monthtab[NLS_NUM_MONTHS];
207 static int nls_months_collide[NLS_NUM_MONTHS + 1];
209 /* Numeric keys, to search for numeric format */
210 struct nls_keyfield
212 struct keyfield *key;
213 struct nls_keyfield *next;
216 static struct nls_keyfield *nls_keyhead = NULL;
218 #endif
220 /* Which month table to use in the program, default C */
221 static const struct month *monthtab = us_monthtab;
223 /* During the merge phase, the number of files to merge at once. */
224 #define NMERGE 16
226 /* Initial buffer size for in core sorting. Will not grow unless a
227 line longer than this is seen. */
228 static int sortalloc = 512 * 1024;
230 /* Initial buffer size for in core merge buffers. Bear in mind that
231 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
232 static int mergealloc = 16 * 1024;
234 /* Guess of average line length. */
235 static int linelength = 30;
237 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
238 #define LINEALLOC (256 * 1024)
240 /* Prefix for temporary file names. */
241 static char *temp_file_prefix;
243 /* Flag to reverse the order of all comparisons. */
244 static int reverse;
246 /* Flag for stable sort. This turns off the last ditch bytewise
247 comparison of lines, and instead leaves lines in the same order
248 they were read if all keys compare equal. */
249 static int stable;
251 /* Tab character separating fields. If NUL, then fields are separated
252 by the empty string between a non-whitespace character and a whitespace
253 character. */
254 static char tab;
256 /* Flag to remove consecutive duplicate lines from the output.
257 Only the last of a sequence of equal lines will be output. */
258 static int unique;
260 /* Nonzero if any of the input files are the standard input. */
261 static int have_read_stdin;
263 /* Lists of key field comparisons to be tried. */
264 static struct keyfield keyhead;
266 static void
267 usage (int status)
269 if (status != 0)
270 fprintf (stderr, _("Try `%s --help' for more information.\n"),
271 program_name);
272 else
274 printf (_("\
275 Usage: %s [OPTION]... [FILE]...\n\
277 program_name);
278 printf (_("\
279 Write sorted concatenation of all FILE(s) to standard output.\n\
281 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
282 -b ignore leading blanks in sort fields or keys\n\
283 -c check if given files already sorted, do not sort\n\
284 -d consider only [a-zA-Z0-9 ] characters in keys\n\
285 -f fold lower case to upper case characters in keys\n\
286 -g compare according to general numerical value, imply -b\n\
287 -i consider only [\\040-\\0176] characters in keys\n\
288 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
289 -m merge already sorted files, do not sort\n\
290 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
291 -n compare according to string numerical value, imply -b\n\
292 -o FILE write result on FILE instead of standard output\n\
293 -r reverse the result of comparisons\n\
294 -s stabilize sort by disabling last resort comparison\n\
295 -t SEP use SEParator instead of non- to whitespace transition\n\
296 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
297 -u with -c, check for strict ordering;\n\
298 with -m, only output the first of an equal sequence\n\
299 -z end lines with 0 byte, not newline, for find -print0\n\
300 --help display this help and exit\n\
301 --version output version information and exit\n\
303 POS is F[.C][OPTS], where F is the field number and C the character\n\
304 position in the field, both counted from zero. OPTS is made up of one\n\
305 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
306 for that key. If no key given, use the entire line as key. With no\n\
307 FILE, or when FILE is -, read standard input.\n\
309 , DEFAULT_TMPDIR);
310 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
312 /* Don't use EXIT_FAILURE here in case it is defined to be 1.
313 POSIX requires that sort return 1 IFF invoked with -c and
314 the input is not properly sorted. */
315 assert (status == 0 || status == SORT_FAILURE);
316 exit (status);
319 /* The list of temporary files. */
320 static struct tempnode
322 char *name;
323 struct tempnode *next;
324 } temphead;
326 /* Clean up any remaining temporary files. */
328 static void
329 cleanup (void)
331 struct tempnode *node;
333 for (node = temphead.next; node; node = node->next)
334 unlink (node->name);
337 static FILE *
338 xtmpfopen (const char *file)
340 FILE *fp;
341 int fd;
343 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0600);
344 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
346 error (0, errno, "%s", file);
347 cleanup ();
348 exit (SORT_FAILURE);
351 return fp;
354 static FILE *
355 xfopen (const char *file, const char *how)
357 FILE *fp;
359 if (STREQ (file, "-"))
361 fp = stdin;
363 else
365 if ((fp = fopen (file, how)) == NULL)
367 error (0, errno, "%s", file);
368 cleanup ();
369 exit (SORT_FAILURE);
373 if (fp == stdin)
374 have_read_stdin = 1;
375 return fp;
378 static void
379 xfclose (FILE *fp)
381 if (fp == stdin)
383 /* Allow reading stdin from tty more than once. */
384 if (feof (fp))
385 clearerr (fp);
387 else if (fp == stdout)
389 if (fflush (fp) != 0)
391 error (0, errno, _("flushing file"));
392 cleanup ();
393 exit (SORT_FAILURE);
396 else
398 if (fclose (fp) != 0)
400 error (0, errno, _("error closing file"));
401 cleanup ();
402 exit (SORT_FAILURE);
407 static void
408 write_bytes (const char *buf, size_t n_bytes, FILE *fp)
410 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
412 error (0, errno, _("write error"));
413 cleanup ();
414 exit (SORT_FAILURE);
418 /* Return a name for a temporary file. */
420 static char *
421 tempname (void)
423 static unsigned int seq;
424 int len = strlen (temp_file_prefix);
425 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
426 struct tempnode *node;
428 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
429 sprintf (name,
430 "%s%ssort%5.5d%5.5d",
431 temp_file_prefix,
432 (len && temp_file_prefix[len - 1] != '/') ? "/" : "",
433 (unsigned int) getpid () & 0xffff, seq);
435 /* Make sure that SEQ's value fits in 5 digits. */
436 ++seq;
437 if (seq >= 100000)
438 seq = 0;
440 node->name = name;
441 node->next = temphead.next;
442 temphead.next = node;
443 return name;
446 /* Search through the list of temporary files for NAME;
447 remove it if it is found on the list. */
449 static void
450 zaptemp (const char *name)
452 struct tempnode *node, *temp;
454 for (node = &temphead; node->next; node = node->next)
455 if (STREQ (name, node->next->name))
456 break;
457 if (node->next)
459 temp = node->next;
460 unlink (temp->name);
461 free (temp->name);
462 node->next = temp->next;
463 free ((char *) temp);
467 #ifdef ENABLE_NLS
468 /* Initialize the character class tables. */
470 static int
471 nls_sort_month_comp (const void *m1, const void *m2)
473 return strcoll (((const struct month *) m1)->name,
474 ((const struct month *) m2)->name);
477 /* Do collation on strings S1 and S2, but for at most L characters.
478 we use the fact, that we KNOW that LEN is the min of the two lengths */
479 static int
480 strncoll (char *s1, char *s2, int len)
482 register int diff;
484 if (need_locale)
486 /* Emulate a strncoll function, by forcing strcoll to compare
487 only the first LEN characters in each string. */
488 register unsigned char n1 = s1[len];
489 register unsigned char n2 = s2[len];
491 s1[len] = s2[len] = 0;
492 diff = strcoll (s1, s2);
493 s1[len] = n1;
494 s2[len] = n2;
496 else
498 diff = memcmp (s1, s2, len);
501 return diff;
504 /* Do collation on strings S1 and S2, but for at most L characters.
505 Use the fact, that we KNOW that S2 is the shorter string and has
506 length LEN. */
507 static int
508 strncoll_s2_readonly (char *s1, const char *s2, int len)
510 register int diff;
512 assert (len == strlen (s2));
513 assert (len <= strlen (s1));
515 if (need_locale)
517 /* Emulate a strncoll function, by forcing strcoll to compare
518 only the first LEN characters in each string. */
519 register unsigned char n1 = s1[len];
521 s1[len] = 0;
522 diff = strcoll (s1, s2);
523 s1[len] = n1;
525 else
527 diff = memcmp (s1, s2, len);
530 return diff;
533 #endif /* NLS */
535 static void
536 inittables (void)
538 int i;
540 for (i = 0; i < UCHAR_LIM; ++i)
542 if (ISBLANK (i))
543 blanks[i] = 1;
544 if (!ISPRINT (i))
545 nonprinting[i] = 1;
546 if (!ISALNUM (i) && !ISBLANK (i))
547 nondictionary[i] = 1;
548 if (ISLOWER (i))
549 fold_toupper[i] = toupper (i);
550 else
551 fold_toupper[i] = i;
554 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
555 /* If We're not in the "C" locale, read in different names for months. */
556 if (need_locale)
558 nls_months_collide[0] = 1; /* if an error, look again */
559 for (i = 0; i < NLS_NUM_MONTHS; i++)
561 char *s;
562 size_t s_len;
563 int j;
565 s = (char *) nl_langinfo (ABMON_1 + us_monthtab[i].val - 1);
566 s_len = strlen (s);
567 nls_monthtab[i].name = (char *) xmalloc (s_len + 1);
568 nls_monthtab[i].val = us_monthtab[i].val;
570 /* Be careful: abreviated month names
571 may be longer than the usual 3 characters. */
572 for (j = 0; j < s_len; j++)
573 nls_monthtab[i].name[j] = fold_toupper[UCHAR (s[j])];
574 nls_monthtab[i].name[j] = '\0';
576 nls_months_collide[nls_monthtab[i].val] = 0;
577 for (j = 0; j < NLS_NUM_MONTHS; ++j)
579 if (STREQ (nls_monthtab[i].name, us_monthtab[i].name))
581 /* There are indeed some month names in English which
582 collide with the NLS name. */
583 nls_months_collide[nls_monthtab[i].val] = 1;
584 break;
588 /* Now quicksort the month table (should be sorted already!).
589 However, another locale doesn't rule out the possibility
590 of a different order of month names. */
591 qsort ((void *) nls_monthtab, NLS_NUM_MONTHS,
592 sizeof (struct month), nls_sort_month_comp);
593 monthtab = nls_monthtab;
595 #endif /* NLS */
598 /* Initialize BUF, allocating ALLOC bytes initially. */
600 static void
601 initbuf (struct buffer *buf, int alloc)
603 buf->alloc = alloc;
604 buf->buf = xmalloc (buf->alloc);
605 buf->used = buf->left = 0;
608 /* Fill BUF reading from FP, moving buf->left bytes from the end
609 of buf->buf to the beginning first. If EOF is reached and the
610 file wasn't terminated by a newline, supply one. Return a count
611 of bytes buffered. */
613 static int
614 fillbuf (struct buffer *buf, FILE *fp)
616 int cc;
618 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
619 buf->used = buf->left;
621 while (!feof (fp) && (buf->used == 0
622 || !memchr (buf->buf, eolchar, buf->used)))
624 if (buf->used == buf->alloc)
626 buf->alloc *= 2;
627 buf->buf = xrealloc (buf->buf, buf->alloc);
629 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
630 if (ferror (fp))
632 error (0, errno, _("read error"));
633 cleanup ();
634 exit (SORT_FAILURE);
636 buf->used += cc;
639 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
641 if (buf->used == buf->alloc)
643 buf->alloc *= 2;
644 buf->buf = xrealloc (buf->buf, buf->alloc);
646 buf->buf[buf->used++] = eolchar;
649 return buf->used;
652 /* Initialize LINES, allocating space for ALLOC lines initially.
653 LIMIT is the maximum possible number of lines to allocate space
654 for, ever. */
656 static void
657 initlines (struct lines *lines, int alloc, int limit)
659 lines->alloc = alloc;
660 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
661 lines->used = 0;
662 lines->limit = limit;
665 /* Return a pointer to the first character of the field specified
666 by KEY in LINE. */
668 static char *
669 begfield (const struct line *line, const struct keyfield *key)
671 register char *ptr = line->text, *lim = ptr + line->length;
672 register int sword = key->sword, schar = key->schar;
674 if (tab)
675 while (ptr < lim && sword--)
677 while (ptr < lim && *ptr != tab)
678 ++ptr;
679 if (ptr < lim)
680 ++ptr;
682 else
683 while (ptr < lim && sword--)
685 while (ptr < lim && blanks[UCHAR (*ptr)])
686 ++ptr;
687 while (ptr < lim && !blanks[UCHAR (*ptr)])
688 ++ptr;
691 if (key->skipsblanks)
692 while (ptr < lim && blanks[UCHAR (*ptr)])
693 ++ptr;
695 if (ptr + schar <= lim)
696 ptr += schar;
697 else
698 ptr = lim;
700 return ptr;
703 /* Return the limit of (a pointer to the first character after) the field
704 in LINE specified by KEY. */
706 static char *
707 limfield (const struct line *line, const struct keyfield *key)
709 register char *ptr = line->text, *lim = ptr + line->length;
710 register int eword = key->eword, echar = key->echar;
712 /* Note: from the POSIX spec:
713 The leading field separator itself is included in
714 a field when -t is not used. FIXME: move this comment up... */
716 /* Move PTR past EWORD fields or to one past the last byte on LINE,
717 whichever comes first. If there are more than EWORD fields, leave
718 PTR pointing at the beginning of the field having zero-based index,
719 EWORD. If a delimiter character was specified (via -t), then that
720 `beginning' is the first character following the delimiting TAB.
721 Otherwise, leave PTR pointing at the first `blank' character after
722 the preceding field. */
723 if (tab)
724 while (ptr < lim && eword--)
726 while (ptr < lim && *ptr != tab)
727 ++ptr;
728 if (ptr < lim && (eword || echar > 0))
729 ++ptr;
731 else
732 while (ptr < lim && eword--)
734 while (ptr < lim && blanks[UCHAR (*ptr)])
735 ++ptr;
736 while (ptr < lim && !blanks[UCHAR (*ptr)])
737 ++ptr;
740 #ifdef POSIX_UNSPECIFIED
741 /* The following block of code makes GNU sort incompatible with
742 standard Unix sort, so it's ifdef'd out for now.
743 The POSIX spec isn't clear on how to interpret this.
744 FIXME: request clarification.
746 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
747 Date: Thu, 30 May 96 12:20:41 -0400
749 [...]I believe I've found another bug in `sort'.
751 $ cat /tmp/sort.in
752 a b c 2 d
753 pq rs 1 t
754 $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
755 a b c 2 d
756 pq rs 1 t
757 $ /bin/sort +0.6 -0.7 </tmp/sort.in
758 pq rs 1 t
759 a b c 2 d
761 Unix sort produced the answer I expected: sort on the single character
762 in column 6. GNU sort produced different results, because it disagrees
763 on the interpretation of the key-end spec "-M.N". Unix sort reads this
764 as "skip M fields, then N characters"; but GNU sort wants it to mean
765 "skip M fields, then either N characters or the rest of the current
766 field, whichever comes first". This extra clause applies only to
767 key-ends, not key-starts.
770 /* Make LIM point to the end of (one byte past) the current field. */
771 if (tab)
773 char *newlim;
774 newlim = memchr (ptr, tab, lim - ptr);
775 if (newlim)
776 lim = newlim;
778 else
780 char *newlim;
781 newlim = ptr;
782 while (newlim < lim && blanks[UCHAR (*newlim)])
783 ++newlim;
784 while (newlim < lim && !blanks[UCHAR (*newlim)])
785 ++newlim;
786 lim = newlim;
788 #endif
790 /* If we're skipping leading blanks, don't start counting characters
791 until after skipping past any leading blanks. */
792 if (key->skipsblanks)
793 while (ptr < lim && blanks[UCHAR (*ptr)])
794 ++ptr;
796 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
797 if (ptr + echar <= lim)
798 ptr += echar;
799 else
800 ptr = lim;
802 return ptr;
805 /* FIXME */
807 void
808 trim_trailing_blanks (const char *a_start, char **a_end)
810 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
811 --(*a_end);
814 /* Find the lines in BUF, storing pointers and lengths in LINES.
815 Also replace newlines in BUF with NULs. */
817 static void
818 findlines (struct buffer *buf, struct lines *lines)
820 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
821 struct keyfield *key = keyhead.next;
823 lines->used = 0;
825 while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))
826 && lines->used < lines->limit)
828 /* There are various places in the code that rely on a NUL
829 being at the end of in-core lines; NULs inside the lines
830 will not cause trouble, though. */
831 *ptr = '\0';
833 if (lines->used == lines->alloc)
835 lines->alloc *= 2;
836 lines->lines = (struct line *)
837 xrealloc ((char *) lines->lines,
838 lines->alloc * sizeof (struct line));
841 lines->lines[lines->used].text = beg;
842 lines->lines[lines->used].length = ptr - beg;
844 /* Precompute the position of the first key for efficiency. */
845 if (key)
847 if (key->eword >= 0)
848 lines->lines[lines->used].keylim =
849 limfield (&lines->lines[lines->used], key);
850 else
851 lines->lines[lines->used].keylim = ptr;
853 if (key->sword >= 0)
854 lines->lines[lines->used].keybeg =
855 begfield (&lines->lines[lines->used], key);
856 else
858 if (key->skipsblanks)
859 while (blanks[UCHAR (*beg)])
860 ++beg;
861 lines->lines[lines->used].keybeg = beg;
863 if (key->skipeblanks)
865 trim_trailing_blanks (lines->lines[lines->used].keybeg,
866 &lines->lines[lines->used].keylim);
869 else
871 lines->lines[lines->used].keybeg = 0;
872 lines->lines[lines->used].keylim = 0;
875 ++lines->used;
876 beg = ptr + 1;
879 buf->left = lim - beg;
882 /* Compare strings A and B containing decimal fractions < 1. Each string
883 should begin with a decimal point followed immediately by the digits
884 of the fraction. Strings not of this form are considered to be zero. */
886 /* The goal here, is to take two numbers a and b... compare these
887 in parallel. Instead of converting each, and then comparing the
888 outcome. Most likely stopping the comparison before the conversion
889 is complete. The algorithm used, in the old sort:
891 Algorithm: fraccompare
892 Action : compare two decimal fractions
893 accepts : char *a, char *b
894 returns : -1 if a<b, 0 if a=b, 1 if a>b.
895 implement:
897 if *a == decimal_point AND *b == decimal_point
898 find first character different in a and b.
899 if both are digits, return the difference *a - *b.
900 if *a is a digit
901 skip past zeroes
902 if digit return 1, else 0
903 if *b is a digit
904 skip past zeroes
905 if digit return -1, else 0
906 if *a is a decimal_point
907 skip past decimal_point and zeroes
908 if digit return 1, else 0
909 if *b is a decimal_point
910 skip past decimal_point and zeroes
911 if digit return -1, else 0
912 return 0
914 The above implementation duplicates code, and thus there is room
915 for improvement:
916 the difference in code of a and b, is solved by using a
917 reference to s, assigned to either a or b. and using diff
918 to denote return value.
919 the difference in either that start being a digit or
920 the decimal point, is solved by testing if either is
921 a decimal point, or if the other is a digit...
923 if *a or *b is a decimal_point
924 skip all chars where *a == *b
925 if *a and *b are digits
926 return *a - *b
927 if *a is a digit or *a is a decimal_point
928 s is a
929 diff is 1
930 else
931 s is b
932 diff is -1
933 skip decimal_point in s
934 skip zeroes in s
935 if *s is a digit
936 return diff
937 return 0 */
939 #define USE_NEW_FRAC_COMPARE
940 #ifdef USE_NEW_FRAC_COMPARE
942 static int
943 fraccompare (register const char *a, register const char *b)
945 # ifdef ENABLE_NLS
946 nls_fraction_found = 1;
947 # endif
949 if (*a == decimal_point || *b == decimal_point)
951 register const char *s;
952 int diff;
954 while (*a == *b)
956 ++a;
957 ++b;
958 if (!ISDIGIT (*a))
959 break;
962 if (ISDIGIT (*a) && ISDIGIT (*b))
963 return (*a) - (*b);
965 if (*a == decimal_point || (ISDIGIT (*a) && *b != decimal_point))
967 s = a;
968 diff = 1;
970 else
972 s = b;
973 diff = -1;
975 if (*s == decimal_point)
976 ++s;
977 while (*s == NUMERIC_ZERO)
978 ++s;
979 if (ISDIGIT (*s))
980 return diff;
982 return 0;
985 #else
986 static int
987 fraccompare (register const char *a, register const char *b)
989 register int tmpa = *a;
990 register int tmpb = *b;
992 if (tmpa == decimal_point && tmpb == decimal_point)
995 tmpa = *++a, tmpb = *++b;
996 while (tmpa == tmpb && ISDIGIT (tmpa));
997 if (ISDIGIT (tmpa) && ISDIGIT (tmpb))
998 return tmpa - tmpb;
999 if (ISDIGIT (tmpa))
1001 while (tmpa == NUMERIC_ZERO)
1002 tmpa = *++a;
1003 if (ISDIGIT (tmpa))
1004 return 1;
1005 return 0;
1007 if (ISDIGIT (tmpb))
1009 while (tmpb == NUMERIC_ZERO)
1010 tmpb = *++b;
1011 if (ISDIGIT (tmpb))
1012 return -1;
1013 return 0;
1015 return 0;
1017 else if (tmpa == decimal_point)
1020 tmpa = *++a;
1021 while (tmpa == NUMERIC_ZERO);
1022 if (ISDIGIT (tmpa))
1023 return 1;
1024 return 0;
1026 else if (tmpb == decimal_point)
1029 tmpb = *++b;
1030 while (tmpb == NUMERIC_ZERO);
1031 if (ISDIGIT (tmpb))
1032 return -1;
1033 return 0;
1035 return 0;
1037 #endif
1039 /* Compare strings A and B as numbers without explicitly converting them to
1040 machine numbers. Comparatively slow for short strings, but asymptotically
1041 hideously fast. */
1043 /* The code here, is like the above... continuous reoccurrance of the
1044 same code... improved 15-JAN-1997 in connection with native languages
1045 support */
1047 #ifdef ENABLE_NLS
1049 /* Decide the kind of fraction the program will use */
1050 static void
1051 nls_set_fraction (register unsigned char ch)
1053 if (!nls_fraction_found && ch != decimal_point)
1055 if (ch == FLOATING_POINT)
1056 { /* US style */
1057 decimal_point = FLOATING_POINT;
1058 th_sep = FLOATING_COMMA;
1060 else if (ch == FLOATING_COMMA)
1061 { /* EU style */
1062 decimal_point = FLOATING_COMMA;
1063 th_sep = FLOATING_POINT;
1065 else if (ch != decimal_point)
1066 { /* Alien */
1067 decimal_point = ch;
1068 th_sep = '\0';
1071 nls_fraction_found = 1;
1074 /* Look for a fraction
1075 It isn't as simple as it looks... however, consider a number:
1076 1.234,00
1077 1,234.00
1078 It's easy to tell which is a decimal point, and which isn't. We use
1079 the grouping information to find out how many digits are grouped together
1080 for thousand separator.
1082 The idea here, is to use the grouping information... but not to
1083 spend time with verifying the groups... not too much time, anyway.
1084 so, a number represented to us as:
1085 1.234.567,89
1086 will be taken and separated into different groups, separated by a
1087 separator character (Decimal point or thousands separator).
1088 {1,234,567}
1089 these are the groups of digits that lead to a separator character,
1090 and with the trailing group is added:
1091 {1,234,567,89}
1092 resulting in 4 groups of numbers. If the resulting number of groups,
1093 are none, or just 1... this is not enough to decide anything about
1094 the decimal point. We need at least two for that. With two groups
1095 we have at least one separator. That separator can be a decimal
1096 point, or a thousands separator... if it is a thousands separator
1097 the number of digits in the last group, will comply with the first
1098 rule in the grouping rule for numeric values. i.e.
1099 |{89}| = grouping[0]
1100 if so, and there are only two groups of numbers, the value cannot
1101 be determined. If there are three or more numbers, the separator
1102 separating the groups is checked. If these are the same, the
1103 character is determined to be a thousands separator. If they are
1104 not the same, the last separator is determined to be a decimal
1105 point. If checking the grouping rules, we find out that there
1106 are no grouping rules defined, either the grouping rules is NULL
1107 or the first grouping number is 0, then the locale format is used.
1109 We try to take an advantage of a special situation. If the trailing
1110 group, the one that normally should be the fractional part, turns
1111 out to have the same length as the thousands separator rule says,
1112 making a doubt on that it may be a decimal point, we look for the
1113 group before that, i.e. with a two group form:
1114 {1234,567}
1115 where the grouping rule is 3;3... we take a look at group 1, and find
1116 out that |{1234}| > larger of the two first grouping rules, then
1117 the separator has to be a decimal point...
1120 static void
1121 look_for_fraction (const char *s, const char *e)
1123 register const char *p;
1124 register unsigned short n = 0;
1125 static unsigned short max_groups = 0;
1126 static unsigned short *groups = NULL;
1128 if (groups == NULL)
1130 max_groups = NLS_MAX_GROUPS;
1131 groups = (unsigned short *) xmalloc (sizeof (*groups) * max_groups);
1134 /* skip blanks and signs */
1135 while (blanks[UCHAR (*s)] || *s == NEGATION_SIGN)
1136 s++;
1137 /* groups = {}, n = 0 */
1138 for (p = s; p < e; p++)
1140 /* groups[n]={number of digits leading to separator n}
1141 n = number of separators so far */
1142 if (*p == decimal_point || *p == th_sep || *p == FLOATING_POINT)
1144 if (++n >= max_groups)
1146 /* BIG Number... enlarge table */
1147 max_groups += NLS_MAX_GROUPS;
1148 groups = (unsigned short *) xrealloc ((char *) groups,
1149 (sizeof (*groups)
1150 * max_groups));
1152 groups[n] = (unsigned short) (p - s);
1153 s = p + 1;
1155 else if (!ISDIGIT (*p))
1156 break;
1157 /* mem[s..p]=digits only */
1159 /* n = number of separators in s..e */
1160 groups[++n] = (short) (p - s);
1161 /* n = groups in the number */
1162 if (n <= 1)
1163 return; /* Only one group of numbers... not enough */
1164 p = nls_grouping;
1165 /* p = address of group rules
1166 s = address of next character after separator */
1167 s = s - 1; /* s = address of last separator */
1168 if (p && *p)
1170 /* a legal trailing group, iff groups[n] == first rule */
1171 if (groups[n] != (short) *p)
1172 nls_set_fraction (*s);
1173 else
1175 if (n == 2)
1176 { /* Only two groups */
1177 if (groups[n - 1] > max (p[0], p[1]))
1178 nls_set_fraction (*s);
1179 return;
1181 /* if the separators are the same, it's a thousands */
1182 if (*s != *(s - groups[n]))
1183 nls_set_fraction (*s);
1184 /* s[0] = thousands separator */
1185 else if (*s == th_sep)
1186 nls_fraction_found = 1;
1189 else
1191 /* no grouping allowed here, last separator IS decimal point */
1192 nls_set_fraction (*s);
1196 static int
1197 numcompare (register const char *a, register const char *b)
1199 int ret_code = 1; /* normal return status, see later in code */
1200 int diff = 0; /* difference between two digits */
1202 while (blanks[UCHAR (*a)])
1203 ++a;
1204 while (blanks[UCHAR (*b)])
1205 ++b;
1207 /* next character in a,b is non-blank */
1208 if ((*a == NEGATION_SIGN || *b == NEGATION_SIGN) && *a != *b)
1210 /* a < 0, or b < 0, but not both */
1211 if (*a == NEGATION_SIGN)
1212 ret_code = -1, ++a; /* a looks < b */
1213 else if (*b == NEGATION_SIGN)
1214 ret_code = 1, ++b; /* b looks < a */
1215 /* bypass zeroes, decimal points, and thousand sep in a & b */
1216 while (*a == NUMERIC_ZERO || (th_sep && *a == th_sep)
1217 || *a == decimal_point)
1218 ++a;
1220 while (*b == NUMERIC_ZERO || (th_sep && *b == th_sep)
1221 || *b == decimal_point)
1222 ++b;
1224 if (ISDIGIT (*a) || ISDIGIT (*b))
1225 /* here, either a or b or both are digits
1226 if a and b are digits, the signed one is the lesser.
1227 if a is a digit, and not b.. it means b==0, and if b==0
1228 than either is signed if b is signed then -0 < a
1229 or if a is signed then -a < 0. The ret_code is already set
1230 to mark that the signed number is the lesser, so we just
1231 return that number here. */
1232 return ret_code;
1234 /* *a and *b are neither digits, they are equal -0 == +0 */
1235 return 0;
1237 else
1239 /* either both numbers are signed, or both are not-signed */
1240 if (*a == NEGATION_SIGN)
1242 ++a;
1243 ++b;
1244 ret_code = -1;
1246 /* if both are signed, then remember -100 < -10 (ret_code reversed!) */
1248 /* Skip any leading zeroes */
1249 while (*a == NUMERIC_ZERO)
1250 ++a;
1251 while (*b == NUMERIC_ZERO)
1252 ++b;
1254 continue_thousands:
1256 /* skip all equal digits */
1257 while (ISDIGIT (*a) && ISDIGIT (*b) && *a == *b)
1258 a++, b++;
1260 /* Here, we have either different digits, or possible fractions
1261 or thousand separators. */
1263 if (ISDIGIT (*a) && ISDIGIT (*b))
1265 if (diff == 0)
1266 diff = ((*a) - (*b)); /* simple, isn't it? not quite */
1267 a++, b++;
1268 goto continue_thousands;
1271 /* now, here either may be a fraction, or a thousand separator...
1272 or both. */
1273 /* We've decided what are decimal_points, and what are thousands sep */
1274 if ((th_sep != 0) && (*a == th_sep || *b == th_sep))
1276 if (*a == th_sep)
1277 ++a;
1278 if (*b == th_sep)
1279 ++b;
1280 goto continue_thousands; /* Ugly, but better than a while(1) */
1283 if (ISDIGIT (*a))
1284 return ret_code; /* a has more digits than b */
1285 if (ISDIGIT (*b))
1286 return ret_code * -1; /* b has more digits than a */
1288 /* now, we should have the fractions solved */
1289 if ((diff == 0) && (*a == decimal_point || *b == decimal_point))
1290 return ret_code * fraccompare (a, b);
1292 return diff; /* fall through here, and diff decides */
1295 #else
1296 static int
1297 numcompare (register const char *a, register const char *b)
1299 register int tmpa, tmpb, loga, logb, tmp;
1301 tmpa = UCHAR (*a);
1302 tmpb = UCHAR (*b);
1304 while (blanks[tmpa])
1305 tmpa = UCHAR (*++a);
1306 while (blanks[tmpb])
1307 tmpb = UCHAR (*++b);
1309 if (tmpa == NEGATION_SIGN)
1312 tmpa = UCHAR (*++a);
1313 while (tmpa == NUMERIC_ZERO);
1314 if (tmpb != NEGATION_SIGN)
1316 if (tmpa == decimal_point)
1318 tmpa = *++a;
1319 while (tmpa == NUMERIC_ZERO);
1320 if (ISDIGIT (tmpa))
1321 return -1;
1322 while (tmpb == NUMERIC_ZERO)
1323 tmpb = UCHAR (*++b);
1324 if (tmpb == decimal_point)
1326 tmpb = *++b;
1327 while (tmpb == NUMERIC_ZERO);
1328 if (ISDIGIT (tmpb))
1329 return -1;
1330 return 0;
1333 tmpb = UCHAR (*++b);
1334 while (tmpb == NUMERIC_ZERO);
1336 while (tmpa == tmpb && ISDIGIT (tmpa))
1337 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
1339 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1340 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1341 return -fraccompare (a, b);
1343 if (ISDIGIT (tmpa))
1344 for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga)
1346 else
1347 loga = 0;
1349 if (ISDIGIT (tmpb))
1350 for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb)
1352 else
1353 logb = 0;
1355 if ((tmp = logb - loga) != 0)
1356 return tmp;
1358 if (!loga)
1359 return 0;
1361 return tmpb - tmpa;
1363 else if (tmpb == NEGATION_SIGN)
1366 tmpb = UCHAR (*++b);
1367 while (tmpb == NUMERIC_ZERO);
1368 if (tmpb == decimal_point)
1370 tmpb = *++b;
1371 while (tmpb == NUMERIC_ZERO);
1372 if (ISDIGIT (tmpb))
1373 return 1;
1374 while (tmpa == NUMERIC_ZERO)
1375 tmpa = UCHAR (*++a);
1376 if (tmpa == decimal_point)
1378 tmpa = UCHAR (*++a);
1379 while (tmpa == NUMERIC_ZERO);
1380 if (ISDIGIT (tmpa))
1381 return 1;
1382 return 0;
1384 else
1386 while (tmpa == NUMERIC_ZERO)
1387 tmpa = UCHAR (*++a);
1388 while (tmpb == NUMERIC_ZERO)
1389 tmpb = UCHAR (*++b);
1391 while (tmpa == tmpb && ISDIGIT (tmpa))
1392 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
1394 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1395 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1396 return fraccompare (a, b);
1398 if (ISDIGIT (tmpa))
1399 for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga)
1401 else
1402 loga = 0;
1404 if (ISDIGIT (tmpb))
1405 for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb)
1407 else
1408 logb = 0;
1410 if ((tmp = loga - logb) != 0)
1411 return tmp;
1413 if (!loga)
1414 return 0;
1416 return tmpa - tmpb;
1419 #endif
1421 static int
1422 general_numcompare (const char *sa, const char *sb)
1424 double a, b;
1425 /* FIXME: add option to warn about failed conversions. */
1426 /* FIXME: maybe add option to try expensive FP conversion
1427 only if A and B can't be compared more cheaply/accurately. */
1428 if (xstrtod (sa, NULL, &a))
1430 a = 0;
1432 if (xstrtod (sb, NULL, &b))
1434 b = 0;
1436 return a == b ? 0 : a < b ? -1 : 1;
1439 /* Return an integer in 1..12 of the month name S with length LEN.
1440 Return 0 if the name in S is not recognized. */
1442 static int
1443 getmonth (const char *s, int len)
1445 char *month;
1446 register int i, lo = 0, hi = 12, result;
1448 while (len > 0 && blanks[UCHAR (*s)])
1450 ++s;
1451 --len;
1454 if (len == 0)
1455 return 0;
1457 month = (char *) alloca (len + 1);
1458 for (i = 0; i < len; ++i)
1459 month[i] = fold_toupper[UCHAR (s[i])];
1460 while (blanks[UCHAR (month[i - 1])])
1461 --i;
1462 month[i] = '\0';
1466 int ix = (lo + hi) / 2;
1468 len = strlen (monthtab[ix].name);
1469 if (NLS_STRNCMP (month, monthtab[ix].name, len) < 0)
1470 hi = ix;
1471 else
1472 lo = ix;
1474 while (hi - lo > 1);
1476 result = (!strncmp (month, monthtab[lo].name, len) ? monthtab[lo].val : 0);
1478 return result;
1481 #ifdef ENABLE_NLS
1482 /* Look for the month in locale table, and if that fails try with
1483 us month name table */
1484 static int
1485 nls_month_is_either_locale (const char *s, int len)
1487 int ind;
1489 monthtab = nls_monthtab;
1490 ind = getmonth (s, len);
1491 if (ind == 0)
1493 monthtab = us_monthtab;
1494 ind = getmonth (s, len);
1496 return ind;
1498 #endif
1500 /* Compare two lines A and B trying every key in sequence until there
1501 are no more keys or a difference is found. */
1503 static int
1504 keycompare (const struct line *a, const struct line *b)
1506 register char *texta, *textb, *lima, *limb;
1507 register unsigned char *translate;
1508 register int *ignore;
1509 struct keyfield *key;
1510 int diff = 0, iter = 0, lena, lenb;
1512 for (key = keyhead.next; key; key = key->next, ++iter)
1514 int comparable_lengths = 1;
1516 ignore = key->ignore;
1517 translate = (unsigned char *) key->translate;
1519 /* Find the beginning and limit of each field. */
1520 if (iter || a->keybeg == NULL || b->keybeg == NULL)
1522 if (key->eword >= 0)
1523 lima = limfield (a, key), limb = limfield (b, key);
1524 else
1525 lima = a->text + a->length, limb = b->text + b->length;
1527 if (key->sword >= 0)
1528 texta = begfield (a, key), textb = begfield (b, key);
1529 else
1531 texta = a->text, textb = b->text;
1532 if (key->skipsblanks)
1534 while (texta < lima && blanks[UCHAR (*texta)])
1535 ++texta;
1536 while (textb < limb && blanks[UCHAR (*textb)])
1537 ++textb;
1541 else
1543 /* For the first iteration only, the key positions have
1544 been precomputed for us. */
1545 texta = a->keybeg, lima = a->keylim;
1546 textb = b->keybeg, limb = b->keylim;
1549 /* Find the lengths. */
1550 lena = lima - texta, lenb = limb - textb;
1551 if (lena < 0)
1552 lena = 0;
1553 if (lenb < 0)
1554 lenb = 0;
1556 if (key->skipeblanks)
1558 char *a_end = texta + lena;
1559 char *b_end = textb + lenb;
1560 trim_trailing_blanks (texta, &a_end);
1561 trim_trailing_blanks (textb, &b_end);
1562 lena = a_end - texta;
1563 lenb = b_end - textb;
1566 /* Actually compare the fields. */
1567 if (key->numeric)
1569 if (*lima || *limb)
1571 char savea = *lima, saveb = *limb;
1573 *lima = *limb = '\0';
1574 diff = numcompare (texta, textb);
1575 *lima = savea, *limb = saveb;
1577 else
1578 diff = numcompare (texta, textb);
1580 if (diff)
1581 return key->reverse ? -diff : diff;
1582 continue;
1584 else if (key->general_numeric)
1586 if (*lima || *limb)
1588 char savea = *lima, saveb = *limb;
1590 *lima = *limb = '\0';
1591 diff = general_numcompare (texta, textb);
1592 *lima = savea, *limb = saveb;
1594 else
1595 diff = general_numcompare (texta, textb);
1597 if (diff)
1598 return key->reverse ? -diff : diff;
1599 continue;
1601 else if (key->month)
1603 #ifdef ENABLE_NLS
1605 /* if we haven't decided which locale to go with, we get the
1606 month name from either. If either month name is fully
1607 solved and the month name doesn't collide with the other
1608 locale... then use that table from there forward */
1609 if (!nls_month_found)
1611 int x;
1613 x = nls_month_is_either_locale (texta, lena);
1614 nls_month_found = !nls_months_collide[x];
1615 if (nls_month_found)
1617 diff = x - getmonth (textb, lenb);
1619 else
1621 diff = nls_month_is_either_locale (textb, lenb);
1622 nls_month_found = !nls_months_collide[diff];
1623 diff = x - diff;
1626 else
1627 #endif
1628 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1629 if (diff)
1630 return key->reverse ? -diff : diff;
1631 continue;
1633 #ifdef ENABLE_NLS
1635 /* Sorting like this may become slow, so in a simple locale the user
1636 can select a faster sort that is similar to ascii sort */
1637 else if (need_locale)
1639 /* FIXME: consider making parameters non-const, then when
1640 both ignore and translate are NULL (which should be most
1641 of the time) we could temporarily NUL-terminate them in
1642 place and avoid the copy. */
1644 char *copy_a = (char *) alloca (lena + 1);
1645 char *copy_b = (char *) alloca (lenb + 1);
1646 int new_len_a, new_len_b, i;
1648 /* We can't use strcoll directly on the two strings,
1649 but rather must extract the text for the key
1650 (to NUL-terminate for strcoll) and handle any
1651 'ignore' and/or 'translate' before comparing. */
1652 for (new_len_a = new_len_b = i = 0; i < max (lena, lenb); i++)
1654 if (i < lena)
1656 copy_a[new_len_a] = (translate
1657 ? translate[UCHAR (texta[i])]
1658 : texta[i]);
1659 if (!ignore || !ignore[UCHAR (texta[i])])
1660 ++new_len_a;
1662 if (i < lenb)
1664 copy_b[new_len_b] = (translate
1665 ? translate[UCHAR (textb[i])]
1666 : textb [i]);
1667 if (!ignore || !ignore[UCHAR (textb[i])])
1668 ++new_len_b;
1672 copy_a[new_len_a] = copy_b[new_len_b] = 0;
1674 diff = strcoll (copy_a, copy_b);
1676 /* Free copy_a and copy_b. */
1677 alloca (0);
1679 if (diff)
1680 return key->reverse ? -diff : diff;
1682 continue;
1684 #endif
1685 else if (ignore && translate)
1687 #define CMP_WITH_IGNORE(A, B) \
1688 do \
1690 while (texta < lima && textb < limb) \
1692 while (texta < lima && ignore[UCHAR (*texta)]) \
1693 ++texta; \
1694 while (textb < limb && ignore[UCHAR (*textb)]) \
1695 ++textb; \
1696 if (texta < lima && textb < limb) \
1698 if ((A) != (B)) \
1700 diff = UCHAR (A) - UCHAR (B); \
1701 break; \
1703 ++texta; \
1704 ++textb; \
1707 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1708 diff = -1; \
1709 else if (texta < lima && textb == limb \
1710 && !ignore[UCHAR (*texta)]) \
1711 diff = 1; \
1714 if (diff == 0) \
1716 while (texta < lima && ignore[UCHAR (*texta)]) \
1717 ++texta; \
1718 while (textb < limb && ignore[UCHAR (*textb)]) \
1719 ++textb; \
1721 if (texta == lima && textb < limb) \
1722 diff = -1; \
1723 else if (texta < lima && textb == limb) \
1724 diff = 1; \
1726 /* Relative lengths are meaningless if characters were ignored. \
1727 Handling this case here avoids what might be an invalid length \
1728 comparison below. */ \
1729 if (diff == 0 && texta == lima && textb == limb) \
1730 comparable_lengths = 0; \
1732 while (0)
1734 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1735 else if (ignore)
1736 CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1737 else if (translate)
1738 while (texta < lima && textb < limb)
1740 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1742 diff = (UCHAR (translate[UCHAR (*--texta)])
1743 - UCHAR (translate[UCHAR (*--textb)]));
1744 break;
1747 else
1749 diff = NLS_MEMCMP (texta, textb, min (lena, lenb));
1752 if (diff)
1753 return key->reverse ? -diff : diff;
1754 if (comparable_lengths && (diff = lena - lenb) != 0)
1755 return key->reverse ? -diff : diff;
1758 return 0;
1761 /* Compare two lines A and B, returning negative, zero, or positive
1762 depending on whether A compares less than, equal to, or greater than B. */
1764 static int
1765 compare (register const struct line *a, register const struct line *b)
1767 int diff, tmpa, tmpb, mini;
1769 /* First try to compare on the specified keys (if any).
1770 The only two cases with no key at all are unadorned sort,
1771 and unadorned sort -r. */
1772 if (keyhead.next)
1774 diff = keycompare (a, b);
1775 if (diff != 0)
1776 return diff;
1777 if (unique || stable)
1778 return 0;
1781 /* If the keys all compare equal (or no keys were specified)
1782 fall through to the default byte-by-byte comparison. */
1783 tmpa = a->length, tmpb = b->length;
1784 mini = min (tmpa, tmpb);
1785 if (mini == 0)
1786 diff = tmpa - tmpb;
1787 else
1789 char *ap = a->text, *bp = b->text;
1791 #ifdef ENABLE_NLS
1792 if (need_locale) /* want absolutely correct sorting */
1794 diff = strcoll (ap, bp);
1795 return reverse ? -diff : diff;
1797 #endif
1798 diff = UCHAR (*ap) - UCHAR (*bp);
1799 if (diff == 0)
1801 diff = NLS_MEMCMP (ap, bp, mini);
1802 if (diff == 0)
1803 diff = tmpa - tmpb;
1807 return reverse ? -diff : diff;
1810 /* Check that the lines read from the given FP come in order. Print a
1811 diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1812 the line number of the first out-of-order line (counting from 1) if they
1813 are not in order. Otherwise, print no diagnostic and return zero. */
1815 static int
1816 checkfp (FILE *fp, const char *file_name)
1818 struct buffer buf; /* Input buffer. */
1819 struct lines lines; /* Lines scanned from the buffer. */
1820 struct line temp; /* Copy of previous line. */
1821 int cc; /* Character count. */
1822 int alloc;
1823 int line_number = 1;
1824 struct line *disorder_line;
1825 int disorder_line_number = 0;
1827 #ifdef lint /* Suppress `used before initialized' warning. */
1828 disorder_line = NULL;
1829 #endif
1831 initbuf (&buf, mergealloc);
1832 initlines (&lines, mergealloc / linelength + 1,
1833 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1834 alloc = linelength;
1835 temp.text = xmalloc (alloc);
1837 cc = fillbuf (&buf, fp);
1838 if (cc == 0)
1839 goto finish;
1841 findlines (&buf, &lines);
1843 while (1)
1845 struct line *prev_line; /* Pointer to previous line. */
1846 int cmp; /* Result of calling compare. */
1847 int i;
1849 /* Compare each line in the buffer with its successor. */
1850 for (i = 0; i < lines.used - 1; ++i)
1852 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1853 if ((unique && cmp >= 0) || (cmp > 0))
1855 disorder_line = &lines.lines[i + 1];
1856 disorder_line_number = line_number + i + 1;
1857 goto finish;
1861 line_number += lines.used;
1863 /* Save the last line of the buffer and refill the buffer. */
1864 prev_line = lines.lines + (lines.used - 1);
1865 if (prev_line->length + 1 > alloc)
1869 alloc *= 2;
1871 while (alloc < prev_line->length + 1);
1872 temp.text = xrealloc (temp.text, alloc);
1874 assert (prev_line->length + 1 <= alloc);
1875 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1876 temp.length = prev_line->length;
1877 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1878 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1880 cc = fillbuf (&buf, fp);
1881 if (cc == 0)
1882 break;
1884 findlines (&buf, &lines);
1885 /* Make sure the line saved from the old buffer contents is
1886 less than or equal to the first line of the new buffer. */
1887 cmp = compare (&temp, &lines.lines[0]);
1888 if ((unique && cmp >= 0) || (cmp > 0))
1890 disorder_line = &lines.lines[0];
1891 disorder_line_number = line_number;
1892 break;
1896 finish:
1897 xfclose (fp);
1899 if (disorder_line_number)
1901 fprintf (stderr, _("%s: %s:%d: disorder: "), program_name, file_name,
1902 disorder_line_number);
1903 write_bytes (disorder_line->text, disorder_line->length, stderr);
1904 putc (eolchar, stderr);
1907 free (buf.buf);
1908 free ((char *) lines.lines);
1909 free (temp.text);
1910 return disorder_line_number;
1913 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1914 Close FPS before returning. */
1916 static void
1917 mergefps (FILE **fps, register int nfps, FILE *ofp)
1919 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1920 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1921 struct line saved; /* Saved line for unique check. */
1922 int savedflag = 0; /* True if there is a saved line. */
1923 int savealloc; /* Size allocated for the saved line. */
1924 int cur[NMERGE]; /* Current line in each line table. */
1925 int ord[NMERGE]; /* Table representing a permutation of fps,
1926 such that lines[ord[0]].lines[cur[ord[0]]]
1927 is the smallest line and will be next
1928 output. */
1929 register int i, j, t;
1931 #ifdef lint /* Suppress `used before initialized' warning. */
1932 savealloc = 0;
1933 #endif
1935 /* Allocate space for a saved line if necessary. */
1936 if (unique)
1938 savealloc = linelength;
1939 saved.text = xmalloc (savealloc);
1942 /* Read initial lines from each input file. */
1943 for (i = 0; i < nfps; ++i)
1945 initbuf (&buffer[i], mergealloc);
1946 /* If a file is empty, eliminate it from future consideration. */
1947 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1949 xfclose (fps[i]);
1950 --nfps;
1951 for (j = i; j < nfps; ++j)
1952 fps[j] = fps[j + 1];
1954 if (i == nfps)
1955 free (buffer[i].buf);
1956 else
1958 initlines (&lines[i], mergealloc / linelength + 1,
1959 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1960 findlines (&buffer[i], &lines[i]);
1961 cur[i] = 0;
1965 /* Set up the ord table according to comparisons among input lines.
1966 Since this only reorders two items if one is strictly greater than
1967 the other, it is stable. */
1968 for (i = 0; i < nfps; ++i)
1969 ord[i] = i;
1970 for (i = 1; i < nfps; ++i)
1971 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1972 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1973 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1975 /* Repeatedly output the smallest line until no input remains. */
1976 while (nfps)
1978 /* If uniqified output is turned on, output only the first of
1979 an identical series of lines. */
1980 if (unique)
1982 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1984 write_bytes (saved.text, saved.length, ofp);
1985 putc (eolchar, ofp);
1986 savedflag = 0;
1988 if (!savedflag)
1990 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1992 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1993 savealloc *= 2;
1994 saved.text = xrealloc (saved.text, savealloc);
1996 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1997 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1998 saved.length + 1);
1999 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
2001 saved.keybeg = saved.text +
2002 (lines[ord[0]].lines[cur[ord[0]]].keybeg
2003 - lines[ord[0]].lines[cur[ord[0]]].text);
2005 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
2007 saved.keylim = saved.text +
2008 (lines[ord[0]].lines[cur[ord[0]]].keylim
2009 - lines[ord[0]].lines[cur[ord[0]]].text);
2011 savedflag = 1;
2014 else
2016 write_bytes (lines[ord[0]].lines[cur[ord[0]]].text,
2017 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
2018 putc (eolchar, ofp);
2021 /* Check if we need to read more lines into core. */
2022 if (++cur[ord[0]] == lines[ord[0]].used)
2024 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
2026 findlines (&buffer[ord[0]], &lines[ord[0]]);
2027 cur[ord[0]] = 0;
2029 else
2031 /* We reached EOF on fps[ord[0]]. */
2032 for (i = 1; i < nfps; ++i)
2033 if (ord[i] > ord[0])
2034 --ord[i];
2035 --nfps;
2036 xfclose (fps[ord[0]]);
2037 free (buffer[ord[0]].buf);
2038 free ((char *) lines[ord[0]].lines);
2039 for (i = ord[0]; i < nfps; ++i)
2041 fps[i] = fps[i + 1];
2042 buffer[i] = buffer[i + 1];
2043 lines[i] = lines[i + 1];
2044 cur[i] = cur[i + 1];
2046 for (i = 0; i < nfps; ++i)
2047 ord[i] = ord[i + 1];
2048 continue;
2052 /* The new line just read in may be larger than other lines
2053 already in core; push it back in the queue until we encounter
2054 a line larger than it. */
2055 for (i = 1; i < nfps; ++i)
2057 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
2058 &lines[ord[i]].lines[cur[ord[i]]]);
2059 if (!t)
2060 t = ord[0] - ord[i];
2061 if (t < 0)
2062 break;
2064 t = ord[0];
2065 for (j = 1; j < i; ++j)
2066 ord[j - 1] = ord[j];
2067 ord[i - 1] = t;
2070 if (unique && savedflag)
2072 write_bytes (saved.text, saved.length, ofp);
2073 putc (eolchar, ofp);
2074 free (saved.text);
2078 #ifdef ENABLE_NLS
2080 /* Find the numeric format that this file represents to us for sorting. */
2081 static void
2082 nls_numeric_format (const struct line *line, int nlines)
2084 struct nls_keyfield *n_key = nls_keyhead;
2086 /* line = first line, nlines = number of lines,
2087 nls_fraction_found = false */
2088 for (; !nls_fraction_found && nlines > 0; line++, nlines--)
2090 int iter;
2091 for (iter = 0; !nls_fraction_found; iter++)
2093 char *text;
2094 char *lim;
2095 struct keyfield *key = n_key->key;
2097 /* text = {}, lim = {}, key = first key */
2098 if (iter || line->keybeg == NULL)
2100 /* Succeding keys, where the key field is
2101 specified */
2102 if (key->eword >= 0) /* key->eword = length of key */
2103 lim = limfield (line, key);
2104 else
2105 lim = line->text + line->length;
2106 /* lim = end of key field */
2108 if (key->sword >= 0) /* key->sword = start of key */
2109 text = begfield (line, key);
2110 else
2111 text = line->text;
2112 /* text = start of field */
2114 else
2116 /* First key is always the whole line */
2117 text = line->keybeg;
2118 lim = line->keylim;
2120 /* text = start of text to sort
2121 lim = end of text to sort */
2123 look_for_fraction (text, lim);
2125 /* nls_fraction_found = decimal_point found? */
2127 if ((n_key = n_key->next) == nls_keyhead)
2128 break; /* No more keys for this line */
2131 nls_fraction_found = 1;
2132 /* decide on current decimal_point known */
2135 #endif
2137 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
2139 static void
2140 sortlines (struct line *lines, int nlines, struct line *temp)
2142 register struct line *lo, *hi, *t;
2143 register int nlo, nhi;
2145 if (nlines == 2)
2147 if (compare (&lines[0], &lines[1]) > 0)
2149 *temp = lines[0];
2150 lines[0] = lines[1];
2151 lines[1] = *temp;
2153 return;
2156 nlo = nlines / 2;
2157 lo = lines;
2158 nhi = nlines - nlo;
2159 hi = lines + nlo;
2161 if (nlo > 1)
2162 sortlines (lo, nlo, temp);
2164 if (nhi > 1)
2165 sortlines (hi, nhi, temp);
2167 t = temp;
2169 while (nlo && nhi)
2170 if (compare (lo, hi) <= 0)
2171 *t++ = *lo++, --nlo;
2172 else
2173 *t++ = *hi++, --nhi;
2174 while (nlo--)
2175 *t++ = *lo++;
2177 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
2178 *lo++ = *t++;
2181 /* Check that each of the NFILES FILES is ordered.
2182 Return a count of disordered files. */
2184 static int
2185 check (char **files, int nfiles)
2187 int i, disorders = 0;
2188 FILE *fp;
2190 for (i = 0; i < nfiles; ++i)
2192 fp = xfopen (files[i], "r");
2193 if (checkfp (fp, files[i]))
2195 ++disorders;
2198 return disorders;
2201 /* Merge NFILES FILES onto OFP. */
2203 static void
2204 merge (char **files, int nfiles, FILE *ofp)
2206 int i, j, t;
2207 char *temp;
2208 FILE *fps[NMERGE], *tfp;
2210 while (nfiles > NMERGE)
2212 t = 0;
2213 for (i = 0; i < nfiles / NMERGE; ++i)
2215 for (j = 0; j < NMERGE; ++j)
2216 fps[j] = xfopen (files[i * NMERGE + j], "r");
2217 tfp = xtmpfopen (temp = tempname ());
2218 mergefps (fps, NMERGE, tfp);
2219 xfclose (tfp);
2220 for (j = 0; j < NMERGE; ++j)
2221 zaptemp (files[i * NMERGE + j]);
2222 files[t++] = temp;
2224 for (j = 0; j < nfiles % NMERGE; ++j)
2225 fps[j] = xfopen (files[i * NMERGE + j], "r");
2226 tfp = xtmpfopen (temp = tempname ());
2227 mergefps (fps, nfiles % NMERGE, tfp);
2228 xfclose (tfp);
2229 for (j = 0; j < nfiles % NMERGE; ++j)
2230 zaptemp (files[i * NMERGE + j]);
2231 files[t++] = temp;
2232 nfiles = t;
2235 for (i = 0; i < nfiles; ++i)
2236 fps[i] = xfopen (files[i], "r");
2237 mergefps (fps, i, ofp);
2238 for (i = 0; i < nfiles; ++i)
2239 zaptemp (files[i]);
2242 /* Sort NFILES FILES onto OFP. */
2244 static void
2245 sort (char **files, int nfiles, FILE *ofp)
2247 struct buffer buf;
2248 struct lines lines;
2249 struct line *tmp;
2250 int i, ntmp;
2251 FILE *fp, *tfp;
2252 struct tempnode *node;
2253 int n_temp_files = 0;
2254 char **tempfiles;
2256 initbuf (&buf, sortalloc);
2257 initlines (&lines, sortalloc / linelength + 1,
2258 LINEALLOC / sizeof (struct line));
2259 ntmp = lines.alloc;
2260 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
2262 while (nfiles--)
2264 fp = xfopen (*files++, "r");
2265 while (fillbuf (&buf, fp))
2267 findlines (&buf, &lines);
2268 if (lines.used > ntmp)
2270 while (lines.used > ntmp)
2271 ntmp *= 2;
2272 tmp = (struct line *)
2273 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
2275 #ifdef ENABLE_NLS
2276 if (nls_keyhead)
2277 nls_keyhead = nls_keyhead->next;
2278 if (!nls_fraction_found && nls_keyhead)
2279 nls_numeric_format (lines.lines, lines.used);
2280 #endif
2281 sortlines (lines.lines, lines.used, tmp);
2282 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
2284 tfp = ofp;
2286 else
2288 ++n_temp_files;
2289 tfp = xtmpfopen (tempname ());
2291 for (i = 0; i < lines.used; ++i)
2292 if (!unique || i == 0
2293 || compare (&lines.lines[i], &lines.lines[i - 1]))
2295 write_bytes (lines.lines[i].text, lines.lines[i].length, tfp);
2296 putc (eolchar, tfp);
2298 if (tfp != ofp)
2299 xfclose (tfp);
2301 xfclose (fp);
2304 free (buf.buf);
2305 free ((char *) lines.lines);
2306 free ((char *) tmp);
2308 if (n_temp_files)
2310 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
2311 i = n_temp_files;
2312 for (node = temphead.next; i > 0; node = node->next)
2313 tempfiles[--i] = node->name;
2314 merge (tempfiles, n_temp_files, ofp);
2315 free ((char *) tempfiles);
2319 /* Insert key KEY at the end of the list (`keyhead'). */
2321 static void
2322 insertkey (struct keyfield *key)
2324 struct keyfield *k = &keyhead;
2326 while (k->next)
2327 k = k->next;
2328 k->next = key;
2329 key->next = NULL;
2330 #ifdef ENABLE_NLS
2331 if (key->numeric || key->general_numeric)
2333 struct nls_keyfield *nk;
2335 nk = (struct nls_keyfield *) xmalloc (sizeof (struct nls_keyfield));
2336 nk->key = key;
2337 if (nls_keyhead)
2339 nk->next = nls_keyhead->next;
2340 nls_keyhead->next = nk;
2342 else
2343 nk->next = nk;
2344 nls_keyhead = nk;
2346 #endif
2349 static void
2350 badfieldspec (const char *s)
2352 error (SORT_FAILURE, 0, _("invalid field specification `%s'"), s);
2355 /* Handle interrupts and hangups. */
2357 static void
2358 sighandler (int sig)
2360 #ifdef SA_INTERRUPT
2361 struct sigaction sigact;
2363 sigact.sa_handler = SIG_DFL;
2364 sigemptyset (&sigact.sa_mask);
2365 sigact.sa_flags = 0;
2366 sigaction (sig, &sigact, NULL);
2367 #else /* !SA_INTERRUPT */
2368 signal (sig, SIG_DFL);
2369 #endif /* SA_INTERRUPT */
2370 cleanup ();
2371 kill (getpid (), sig);
2374 /* Set the ordering options for KEY specified in S.
2375 Return the address of the first character in S that
2376 is not a valid ordering option.
2377 BLANKTYPE is the kind of blanks that 'b' should skip. */
2379 static char *
2380 set_ordering (register const char *s, struct keyfield *key,
2381 enum blanktype blanktype)
2383 while (*s)
2385 switch (*s)
2387 case 'b':
2388 if (blanktype == bl_start || blanktype == bl_both)
2389 key->skipsblanks = 1;
2390 if (blanktype == bl_end || blanktype == bl_both)
2391 key->skipeblanks = 1;
2392 break;
2393 case 'd':
2394 key->ignore = nondictionary;
2395 break;
2396 case 'f':
2397 key->translate = fold_toupper;
2398 break;
2399 case 'g':
2400 key->general_numeric = 1;
2401 break;
2402 case 'i':
2403 key->ignore = nonprinting;
2404 break;
2405 case 'M':
2406 key->month = 1;
2407 break;
2408 case 'n':
2409 key->numeric = 1;
2410 break;
2411 case 'r':
2412 key->reverse = 1;
2413 break;
2414 default:
2415 return (char *) s;
2417 ++s;
2419 return (char *) s;
2422 static void
2423 key_init (struct keyfield *key)
2425 memset (key, 0, sizeof (*key));
2426 key->eword = -1;
2429 /* strdup and return the result of setlocale, but guard against a NULL
2430 return value. If setlocale returns NULL, strdup FAIL_VAL instead. */
2432 #if defined ENABLE_NLS && ( !defined __GLIBC__ || __GLIBC__ < 2 )
2433 static inline char *
2434 my_setlocale (const char *locale, const char *fail_val)
2436 char *s = setlocale (LC_ALL, locale);
2437 if (s == NULL)
2438 s = (char *) fail_val;
2439 return xstrdup (s);
2441 #endif
2444 main (int argc, char **argv)
2446 struct keyfield *key = NULL, gkey;
2447 char *s;
2448 int i, t, t2;
2449 int checkonly = 0, mergeonly = 0, nfiles = 0;
2450 char *minus = "-", *outfile = minus, **files, *tmp;
2451 FILE *ofp;
2452 #ifdef SA_INTERRUPT
2453 struct sigaction oldact, newact;
2454 #endif /* SA_INTERRUPT */
2456 program_name = argv[0];
2458 #ifdef ENABLE_NLS
2460 /* Determine whether the current locale is C or POSIX. */
2461 # if defined __GLIBC__ && __GLIBC__ >= 2
2462 s = setlocale (LC_ALL, "");
2463 if (s != NULL && !STREQ (s, "C") && !STREQ (s, "POSIX"))
2464 /* The current locale is neither C nor POSIX. We'll need to do
2465 more work. */
2466 need_locale = 1;
2467 # else
2469 char *c_locale_string = my_setlocale ("C", "");
2470 char *posix_locale_string = my_setlocale ("POSIX", "");
2471 char *current_locale_string = setlocale (LC_ALL, "");
2473 if (current_locale_string != NULL
2474 && !STREQ (current_locale_string, c_locale_string)
2475 && !STREQ (current_locale_string, posix_locale_string))
2476 /* The current locale is neither C nor POSIX.
2477 We'll need to do more work. */
2478 need_locale = 1;
2480 free (c_locale_string);
2481 free (posix_locale_string);
2483 # endif
2485 /* Let's get locale's representation of the decimal point */
2487 struct lconv *lconvp = localeconv ();
2489 decimal_point = *lconvp->decimal_point;
2490 th_sep = *lconvp->thousands_sep;
2491 nls_grouping = (char *) (lconvp->grouping);
2494 /* if locale doesn't define a decimal point, we'll use the
2495 US notation. */
2496 if (decimal_point == '\0')
2497 decimal_point = FLOATING_POINT;
2498 else
2499 nls_fraction_found = 0; /* Figure out which decimal point to use */
2501 nls_month_found = 0; /* Figure out which month notation to use */
2503 monthtab = nls_monthtab;
2505 #endif /* NLS */
2507 bindtextdomain (PACKAGE, LOCALEDIR);
2508 textdomain (PACKAGE);
2510 parse_long_options (argc, argv, "sort", GNU_PACKAGE, VERSION, usage);
2512 have_read_stdin = 0;
2513 inittables ();
2515 temp_file_prefix = getenv ("TMPDIR");
2516 if (temp_file_prefix == NULL)
2517 temp_file_prefix = DEFAULT_TMPDIR;
2519 /* Change the way xmalloc and xrealloc fail. */
2520 xalloc_exit_failure = SORT_FAILURE;
2521 xalloc_fail_func = cleanup;
2523 #ifdef SA_INTERRUPT
2524 newact.sa_handler = sighandler;
2525 sigemptyset (&newact.sa_mask);
2526 newact.sa_flags = 0;
2528 sigaction (SIGINT, NULL, &oldact);
2529 if (oldact.sa_handler != SIG_IGN)
2530 sigaction (SIGINT, &newact, NULL);
2531 sigaction (SIGHUP, NULL, &oldact);
2532 if (oldact.sa_handler != SIG_IGN)
2533 sigaction (SIGHUP, &newact, NULL);
2534 sigaction (SIGPIPE, NULL, &oldact);
2535 if (oldact.sa_handler != SIG_IGN)
2536 sigaction (SIGPIPE, &newact, NULL);
2537 sigaction (SIGTERM, NULL, &oldact);
2538 if (oldact.sa_handler != SIG_IGN)
2539 sigaction (SIGTERM, &newact, NULL);
2540 #else /* !SA_INTERRUPT */
2541 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
2542 signal (SIGINT, sighandler);
2543 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
2544 signal (SIGHUP, sighandler);
2545 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
2546 signal (SIGPIPE, sighandler);
2547 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
2548 signal (SIGTERM, sighandler);
2549 #endif /* !SA_INTERRUPT */
2551 gkey.sword = gkey.eword = -1;
2552 gkey.ignore = NULL;
2553 gkey.translate = NULL;
2554 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
2555 gkey.skipsblanks = gkey.skipeblanks = 0;
2557 files = (char **) xmalloc (sizeof (char *) * argc);
2559 for (i = 1; i < argc; ++i)
2561 if (argv[i][0] == '+')
2563 if (key)
2564 insertkey (key);
2565 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
2566 key_init (key);
2567 s = argv[i] + 1;
2568 if (! (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1]))))
2569 badfieldspec (argv[i]);
2570 for (t = 0; ISDIGIT (*s); ++s)
2571 t = 10 * t + *s - '0';
2572 t2 = 0;
2573 if (*s == '.')
2574 for (++s; ISDIGIT (*s); ++s)
2575 t2 = 10 * t2 + *s - '0';
2576 if (t2 || t)
2578 key->sword = t;
2579 key->schar = t2;
2581 else
2582 key->sword = -1;
2583 s = set_ordering (s, key, bl_start);
2584 if (*s)
2585 badfieldspec (argv[i]);
2587 else if (argv[i][0] == '-' && argv[i][1])
2589 s = argv[i] + 1;
2590 if (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1])))
2592 if (!key)
2594 /* Provoke with `sort -9'. */
2595 error (0, 0, _("when using the old-style +POS and -POS \
2596 key specifiers,\nthe +POS specifier must come first"));
2597 usage (SORT_FAILURE);
2599 for (t = 0; ISDIGIT (*s); ++s)
2600 t = t * 10 + *s - '0';
2601 t2 = 0;
2602 if (*s == '.')
2603 for (++s; ISDIGIT (*s); ++s)
2604 t2 = t2 * 10 + *s - '0';
2605 key->eword = t;
2606 key->echar = t2;
2607 s = set_ordering (s, key, bl_end);
2608 if (*s)
2609 badfieldspec (argv[i]);
2610 insertkey (key);
2611 key = NULL;
2613 else
2614 while (*s)
2616 s = set_ordering (s, &gkey, bl_both);
2617 switch (*s)
2619 case '\0':
2620 break;
2621 case 'c':
2622 checkonly = 1;
2623 break;
2624 case 'k':
2625 if (s[1])
2626 ++s;
2627 else
2629 if (i == argc - 1)
2630 error (SORT_FAILURE, 0,
2631 _("option `-k' requires an argument"));
2632 else
2633 s = argv[++i];
2635 if (key)
2636 insertkey (key);
2637 key = (struct keyfield *)
2638 xmalloc (sizeof (struct keyfield));
2639 key_init (key);
2640 /* Get POS1. */
2641 if (!ISDIGIT (*s))
2642 badfieldspec (argv[i]);
2643 for (t = 0; ISDIGIT (*s); ++s)
2644 t = 10 * t + *s - '0';
2645 if (t == 0)
2647 /* Provoke with `sort -k0' */
2648 error (0, 0, _("the starting field number argument \
2649 to the `-k' option must be positive"));
2650 badfieldspec (argv[i]);
2652 --t;
2653 t2 = 0;
2654 if (*s == '.')
2656 if (!ISDIGIT (s[1]))
2658 /* Provoke with `sort -k1.' */
2659 error (0, 0, _("starting field spec has `.' but \
2660 lacks following character offset"));
2661 badfieldspec (argv[i]);
2663 for (++s; ISDIGIT (*s); ++s)
2664 t2 = 10 * t2 + *s - '0';
2665 if (t2 == 0)
2667 /* Provoke with `sort -k1.0' */
2668 error (0, 0, _("starting field character offset \
2669 argument to the `-k' option\nmust be positive"));
2670 badfieldspec (argv[i]);
2672 --t2;
2674 if (t2 || t)
2676 key->sword = t;
2677 key->schar = t2;
2679 else
2680 key->sword = -1;
2681 s = set_ordering (s, key, bl_start);
2682 if (*s == 0)
2684 key->eword = -1;
2685 key->echar = 0;
2687 else if (*s != ',')
2688 badfieldspec (argv[i]);
2689 else if (*s == ',')
2691 /* Skip over comma. */
2692 ++s;
2693 if (*s == 0)
2695 /* Provoke with `sort -k1,' */
2696 error (0, 0, _("field specification has `,' but \
2697 lacks following field spec"));
2698 badfieldspec (argv[i]);
2700 /* Get POS2. */
2701 for (t = 0; ISDIGIT (*s); ++s)
2702 t = t * 10 + *s - '0';
2703 if (t == 0)
2705 /* Provoke with `sort -k1,0' */
2706 error (0, 0, _("ending field number argument \
2707 to the `-k' option must be positive"));
2708 badfieldspec (argv[i]);
2710 --t;
2711 t2 = 0;
2712 if (*s == '.')
2714 if (!ISDIGIT (s[1]))
2716 /* Provoke with `sort -k1,1.' */
2717 error (0, 0, _("ending field spec has `.' \
2718 but lacks following character offset"));
2719 badfieldspec (argv[i]);
2721 for (++s; ISDIGIT (*s); ++s)
2722 t2 = t2 * 10 + *s - '0';
2724 else
2726 /* `-k 2,3' is equivalent to `+1 -3'. */
2727 ++t;
2729 key->eword = t;
2730 key->echar = t2;
2731 s = set_ordering (s, key, bl_end);
2732 if (*s)
2733 badfieldspec (argv[i]);
2735 insertkey (key);
2736 key = NULL;
2737 goto outer;
2738 case 'm':
2739 mergeonly = 1;
2740 break;
2741 case 'o':
2742 if (s[1])
2743 outfile = s + 1;
2744 else
2746 if (i == argc - 1)
2747 error (SORT_FAILURE, 0,
2748 _("option `-o' requires an argument"));
2749 else
2750 outfile = argv[++i];
2752 goto outer;
2753 case 's':
2754 stable = 1;
2755 break;
2756 case 't':
2757 if (s[1])
2758 tab = *++s;
2759 else if (i < argc - 1)
2761 tab = *argv[++i];
2762 goto outer;
2764 else
2765 error (SORT_FAILURE, 0,
2766 _("option `-t' requires an argument"));
2767 break;
2768 case 'T':
2769 if (s[1])
2770 temp_file_prefix = ++s;
2771 else
2773 if (i < argc - 1)
2774 temp_file_prefix = argv[++i];
2775 else
2776 error (SORT_FAILURE, 0,
2777 _("option `-T' requires an argument"));
2779 goto outer;
2780 /* break; */
2781 case 'u':
2782 unique = 1;
2783 break;
2784 case 'z':
2785 eolchar = 0;
2786 break;
2787 case 'y':
2788 /* Accept and ignore e.g. -y0 for compatibility with
2789 Solaris 2. */
2790 goto outer;
2791 default:
2792 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2793 argv[0], *s);
2794 usage (SORT_FAILURE);
2796 if (*s)
2797 ++s;
2800 else /* Not an option. */
2802 files[nfiles++] = argv[i];
2804 outer:;
2807 if (key)
2808 insertkey (key);
2810 /* Inheritance of global options to individual keys. */
2811 for (key = keyhead.next; key; key = key->next)
2812 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2813 && !key->skipeblanks && !key->month && !key->numeric
2814 && !key->general_numeric)
2816 key->ignore = gkey.ignore;
2817 key->translate = gkey.translate;
2818 key->skipsblanks = gkey.skipsblanks;
2819 key->skipeblanks = gkey.skipeblanks;
2820 key->month = gkey.month;
2821 key->numeric = gkey.numeric;
2822 key->general_numeric = gkey.general_numeric;
2823 key->reverse = gkey.reverse;
2826 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2827 || gkey.skipeblanks || gkey.month || gkey.numeric
2828 || gkey.general_numeric))
2829 insertkey (&gkey);
2830 reverse = gkey.reverse;
2832 if (nfiles == 0)
2834 nfiles = 1;
2835 files = &minus;
2838 if (checkonly)
2840 /* POSIX requires that sort return 1 IFF invoked with -c and the
2841 input is not properly sorted. */
2842 exit (check (files, nfiles) == 0 ? 0 : 1);
2845 if (!STREQ (outfile, "-"))
2847 struct stat outstat;
2848 if (stat (outfile, &outstat) == 0)
2850 /* The following code prevents a race condition when
2851 people use the brain dead shell programming idiom:
2852 cat file | sort -o file
2853 This feature is provided for historical compatibility,
2854 but we strongly discourage ever relying on this in
2855 new shell programs. */
2857 /* Temporarily copy each input file that might be another name
2858 for the output file. When in doubt (e.g. a pipe), copy. */
2859 for (i = 0; i < nfiles; ++i)
2861 char buf[8192];
2862 FILE *in_fp;
2863 FILE *out_fp;
2864 int cc;
2866 if (S_ISREG (outstat.st_mode) && !STREQ (outfile, files[i]))
2868 struct stat instat;
2869 if ((STREQ (files[i], "-")
2870 ? fstat (STDIN_FILENO, &instat)
2871 : stat (files[i], &instat)) != 0)
2873 error (0, errno, "%s", files[i]);
2874 cleanup ();
2875 exit (SORT_FAILURE);
2877 if (S_ISREG (instat.st_mode) && !SAME_INODE (instat, outstat))
2879 /* We know the files are distinct. */
2880 continue;
2884 in_fp = xfopen (files[i], "r");
2885 tmp = tempname ();
2886 out_fp = xtmpfopen (tmp);
2887 /* FIXME: maybe use copy.c(copy) here. */
2888 while ((cc = fread (buf, 1, sizeof buf, in_fp)) > 0)
2889 write_bytes (buf, cc, out_fp);
2890 if (ferror (in_fp))
2892 error (0, errno, "%s", files[i]);
2893 cleanup ();
2894 exit (SORT_FAILURE);
2896 xfclose (out_fp);
2897 xfclose (in_fp);
2898 files[i] = tmp;
2900 ofp = xfopen (outfile, "w");
2902 else
2904 /* A non-`-' outfile was specified, but the file doesn't yet exist.
2905 Before opening it for writing (thus creating it), make sure all
2906 of the input files exist. Otherwise, creating the output file
2907 could create an otherwise missing input file, making sort succeed
2908 when it should fail. */
2909 for (i = 0; i < nfiles; ++i)
2911 struct stat sb;
2912 if (STREQ (files[i], "-"))
2913 continue;
2914 if (stat (files[i], &sb))
2916 error (0, errno, "%s", files[i]);
2917 cleanup ();
2918 exit (SORT_FAILURE);
2922 ofp = xfopen (outfile, "w");
2925 else
2927 ofp = stdout;
2930 if (mergeonly)
2931 merge (files, nfiles, ofp);
2932 else
2933 sort (files, nfiles, ofp);
2934 cleanup ();
2936 /* If we wait for the implicit flush on exit, and the parent process
2937 has closed stdout (e.g., exec >&- in a shell), then the output file
2938 winds up empty. I don't understand why. This is under SunOS,
2939 Solaris, Ultrix, and Irix. This premature fflush makes the output
2940 reappear. --karl@cs.umb.edu */
2941 if (fflush (ofp) < 0)
2942 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2944 if (have_read_stdin && fclose (stdin) == EOF)
2945 error (SORT_FAILURE, errno, outfile);
2946 if (ferror (stdout) || fclose (stdout) == EOF)
2947 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2949 exit (EXIT_SUCCESS);