*** empty log message ***
[coreutils.git] / src / sort.c
bloba0fb854c2178d0c1db01298a39aa0009fb500050
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 1991-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
24 #include <config.h>
26 #include <sys/types.h>
27 #include <signal.h>
28 #include <stdio.h>
29 #include <assert.h>
30 #include "system.h"
31 #include "long-options.h"
32 #include "error.h"
33 #include "xstrtod.h"
34 #include "xalloc.h"
36 /* The official name of this program (e.g., no `g' prefix). */
37 #define PROGRAM_NAME "sort"
39 #define AUTHORS "Mike Haertel"
41 #if defined ENABLE_NLS && HAVE_LANGINFO_H
42 # include <langinfo.h>
43 #endif
45 #if HAVE_PATHCONF && defined _PC_NAME_MAX
46 # define NAME_MAX_IN_DIR(Dir) pathconf (Dir, _PC_NAME_MAX)
47 #else
48 # define NAME_MAX_IN_DIR(Dir) 255
49 #endif
51 char *xstrdup ();
53 /* Undefine, to avoid warning about redefinition on some systems. */
54 #undef min
55 #define min(a, b) ((a) < (b) ? (a) : (b))
56 #undef max
57 #define max(a, b) ((a) > (b) ? (a) : (b))
59 #define UCHAR_LIM (UCHAR_MAX + 1)
60 #define UCHAR(c) ((unsigned char) (c))
62 #ifndef DEFAULT_TMPDIR
63 # define DEFAULT_TMPDIR "/tmp"
64 #endif
66 /* Use this as exit status in case of error, not EXIT_FAILURE. This
67 is necessary because EXIT_FAILURE is usually 1 and POSIX requires
68 that sort exit with status 1 IFF invoked with -c and the input is
69 not properly sorted. Any other irregular exit must exit with a
70 status code greater than 1. */
71 #define SORT_FAILURE 2
73 #define FLOATING_POINT '.'
74 #define FLOATING_COMMA ','
75 #define NEGATION_SIGN '-'
76 #define NUMERIC_ZERO '0'
78 #ifdef ENABLE_NLS
79 # define NLS_MEMCMP(S1, S2, Len) strncoll (S1, S2, Len)
80 # define NLS_STRNCMP(S1, S2, Len) strncoll_s2_readonly (S1, S2, Len)
81 #else
82 # define NLS_MEMCMP(S1, S2, Len) memcmp (S1, S2, Len)
83 # define NLS_STRNCMP(S1, S2, Len) strncmp (S1, S2, Len)
84 #endif
86 #ifdef ENABLE_NLS
88 static unsigned char decimal_point;
89 static unsigned char th_sep;
90 static char *nls_grouping;
92 /* This is "C" locale, need another? */
93 static int need_locale = 0;
95 /* Should we look for decimal point? */
96 static int nls_fraction_found = 1;
98 /* Look for month notations in text? */
99 static int nls_month_found = 1;
101 #else
103 # define decimal_point FLOATING_POINT
105 #endif
107 /* If native language support is requested, make a 1-1 map to the
108 locale character map, otherwise ensure normal behavior. */
109 #ifdef ENABLE_NLS
111 /* 12 months in a year */
112 # define NLS_NUM_MONTHS 12
114 /* Maximum number of elements, to allocate per allocation unit */
115 # define NLS_MAX_GROUPS 8
117 /* A string with one character, to enforce char collation */
118 # define NLS_ONE_CHARACTER_STRING " "
120 #endif
122 /* The kind of blanks for '-b' to skip in various options. */
123 enum blanktype { bl_start, bl_end, bl_both };
125 /* The character marking end of line. Default to \n. */
126 int eolchar = '\n';
128 /* Lines are held in core as counted strings. */
129 struct line
131 char *text; /* Text of the line. */
132 int length; /* Length not including final newline. */
133 char *keybeg; /* Start of first key. */
134 char *keylim; /* Limit of first key. */
137 /* Arrays of lines. */
138 struct lines
140 struct line *lines; /* Dynamically allocated array of lines. */
141 int used; /* Number of slots used. */
142 int alloc; /* Number of slots allocated. */
143 int limit; /* Max number of slots to allocate. */
146 /* Input buffers. */
147 struct buffer
149 char *buf; /* Dynamically allocated buffer. */
150 int used; /* Number of bytes used. */
151 int alloc; /* Number of bytes allocated. */
152 int left; /* Number of bytes left after line parsing. */
155 struct keyfield
157 int sword; /* Zero-origin 'word' to start at. */
158 int schar; /* Additional characters to skip. */
159 int skipsblanks; /* Skip leading white space at start. */
160 int eword; /* Zero-origin first word after field. */
161 int echar; /* Additional characters in field. */
162 int skipeblanks; /* Skip trailing white space at finish. */
163 int *ignore; /* Boolean array of characters to ignore. */
164 char *translate; /* Translation applied to characters. */
165 int numeric; /* Flag for numeric comparison. Handle
166 strings of digits with optional decimal
167 point, but no exponential notation. */
168 int general_numeric; /* Flag for general, numeric comparison.
169 Handle numbers in exponential notation. */
170 int month; /* Flag for comparison by month name. */
171 int reverse; /* Reverse the sense of comparison. */
172 struct keyfield *next; /* Next keyfield to try. */
175 struct month
177 char *name;
178 int val;
181 /* The name this program was run with. */
182 char *program_name;
184 /* Table of white space. */
185 static int blanks[UCHAR_LIM];
187 /* Table of non-printing characters. */
188 static int nonprinting[UCHAR_LIM];
190 /* Table of non-dictionary characters (not letters, digits, or blanks). */
191 static int nondictionary[UCHAR_LIM];
193 /* Translation table folding lower case to upper. */
194 static char fold_toupper[UCHAR_LIM];
196 /* Table mapping 3-letter month names to integers.
197 Alphabetic order allows binary search. */
198 static const struct month us_monthtab[] =
200 {"APR", 4},
201 {"AUG", 8},
202 {"DEC", 12},
203 {"FEB", 2},
204 {"JAN", 1},
205 {"JUL", 7},
206 {"JUN", 6},
207 {"MAR", 3},
208 {"MAY", 5},
209 {"NOV", 11},
210 {"OCT", 10},
211 {"SEP", 9}
214 #ifdef ENABLE_NLS
216 /* Locale may have a different idea of month names */
217 static struct month nls_monthtab[NLS_NUM_MONTHS];
218 static int nls_months_collide[NLS_NUM_MONTHS + 1];
220 /* Numeric keys, to search for numeric format */
221 struct nls_keyfield
223 struct keyfield *key;
224 struct nls_keyfield *next;
227 static struct nls_keyfield *nls_keyhead = NULL;
229 #endif
231 /* Which month table to use in the program, default C */
232 static const struct month *monthtab = us_monthtab;
234 /* During the merge phase, the number of files to merge at once. */
235 #define NMERGE 16
237 /* Initial buffer size for in core sorting. Will not grow unless a
238 line longer than this is seen. */
239 static int sortalloc = 512 * 1024;
241 /* Initial buffer size for in core merge buffers. Bear in mind that
242 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
243 static int mergealloc = 16 * 1024;
245 /* Guess of average line length. */
246 static int linelength = 30;
248 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
249 #define LINEALLOC (256 * 1024)
251 /* Directory in which any temporary files are to be created. */
252 static char *temp_dir;
254 /* Flag to reverse the order of all comparisons. */
255 static int reverse;
257 /* Flag for stable sort. This turns off the last ditch bytewise
258 comparison of lines, and instead leaves lines in the same order
259 they were read if all keys compare equal. */
260 static int stable;
262 /* Tab character separating fields. If NUL, then fields are separated
263 by the empty string between a non-whitespace character and a whitespace
264 character. */
265 static char tab;
267 /* Flag to remove consecutive duplicate lines from the output.
268 Only the last of a sequence of equal lines will be output. */
269 static int unique;
271 /* Nonzero if any of the input files are the standard input. */
272 static int have_read_stdin;
274 /* Lists of key field comparisons to be tried. */
275 static struct keyfield keyhead;
277 void
278 usage (int status)
280 if (status != 0)
281 fprintf (stderr, _("Try `%s --help' for more information.\n"),
282 program_name);
283 else
285 printf (_("\
286 Usage: %s [OPTION]... [FILE]...\n\
288 program_name);
289 printf (_("\
290 Write sorted concatenation of all FILE(s) to standard output.\n\
292 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
293 -b ignore leading blanks in sort fields or keys\n\
294 -c check if given files already sorted, do not sort\n\
295 -d consider only [a-zA-Z0-9 ] characters in keys\n\
296 -f fold lower case to upper case characters in keys\n\
297 -g compare according to general numerical value, imply -b\n\
298 -i consider only [\\040-\\0176] characters in keys\n\
299 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
300 -m merge already sorted files, do not sort\n\
301 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
302 -n compare according to string numerical value, imply -b\n\
303 -o FILE write result on FILE instead of standard output\n\
304 -r reverse the result of comparisons\n\
305 -s stabilize sort by disabling last resort comparison\n\
306 -t SEP use SEParator instead of non- to whitespace transition\n\
307 -T DIRECTORY use DIRECTORY for temporary files, not $TMPDIR or %s\n\
308 -u with -c, check for strict ordering;\n\
309 with -m, only output the first of an equal sequence\n\
310 -z end lines with 0 byte, not newline, for find -print0\n\
311 --help display this help and exit\n\
312 --version output version information and exit\n\
314 POS is F[.C][OPTS], where F is the field number and C the character\n\
315 position in the field, both counted from zero. OPTS is made up of one\n\
316 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
317 for that key. If no key given, use the entire line as key. With no\n\
318 FILE, or when FILE is -, read standard input.\n\
320 , DEFAULT_TMPDIR);
321 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
323 /* Don't use EXIT_FAILURE here in case it is defined to be 1.
324 POSIX requires that sort return 1 IFF invoked with -c and
325 the input is not properly sorted. */
326 assert (status == 0 || status == SORT_FAILURE);
327 exit (status);
330 /* The list of temporary files. */
331 static struct tempnode
333 char *name;
334 struct tempnode *next;
335 } temphead;
337 /* Clean up any remaining temporary files. */
339 static void
340 cleanup (void)
342 struct tempnode *node;
344 for (node = temphead.next; node; node = node->next)
345 unlink (node->name);
348 static FILE *
349 xtmpfopen (const char *file)
351 FILE *fp;
352 int fd;
354 /* Open temporary file exclusively, to foil a common
355 denial-of-service attack. */
356 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0600);
357 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
359 error (0, errno, "%s", file);
360 cleanup ();
361 exit (SORT_FAILURE);
364 return fp;
367 static FILE *
368 xfopen (const char *file, const char *how)
370 FILE *fp;
372 if (STREQ (file, "-"))
374 fp = stdin;
376 else
378 if ((fp = fopen (file, how)) == NULL)
380 error (0, errno, "%s", file);
381 cleanup ();
382 exit (SORT_FAILURE);
386 if (fp == stdin)
387 have_read_stdin = 1;
388 return fp;
391 static void
392 xfclose (FILE *fp)
394 if (fp == stdin)
396 /* Allow reading stdin from tty more than once. */
397 if (feof (fp))
398 clearerr (fp);
400 else if (fp == stdout)
402 if (fflush (fp) != 0)
404 error (0, errno, _("flushing file"));
405 cleanup ();
406 exit (SORT_FAILURE);
409 else
411 if (fclose (fp) != 0)
413 error (0, errno, _("error closing file"));
414 cleanup ();
415 exit (SORT_FAILURE);
420 static void
421 write_bytes (const char *buf, size_t n_bytes, FILE *fp)
423 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
425 error (0, errno, _("write error"));
426 cleanup ();
427 exit (SORT_FAILURE);
431 /* Return a name for a temporary file. */
433 static char *
434 tempname (void)
436 static unsigned int seq;
437 int len = strlen (temp_dir);
438 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
439 int long_file_names = NAME_MAX_IN_DIR (temp_dir) > 12;
440 struct tempnode *node;
442 /* If long filenames aren't supported, we cannot use filenames
443 longer than 8+3 and still assume they are unique. */
444 if (long_file_names)
445 sprintf (name,
446 "%s%ssort%5.5d%5.5d",
447 temp_dir,
448 (len && temp_dir[len - 1] != '/') ? "/" : "",
449 (unsigned int) getpid () & 0xffff, seq);
450 else
451 sprintf (name, "%s%ss%5.5d%2.2d.%3.3d",
452 temp_dir,
453 (len && temp_dir[len - 1] != '/') ? "/" : "",
454 (unsigned int) getpid () & 0xffff, seq / 1000, seq % 1000);
456 ++seq;
458 /* Make sure that SEQ's value fits in 5 digits if temp_dir is on
459 an 8.3 filesystem. */
460 if (!long_file_names && seq >= 100000)
461 seq = 0;
463 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
464 node->name = name;
465 node->next = temphead.next;
466 temphead.next = node;
467 return name;
470 /* Search through the list of temporary files for NAME;
471 remove it if it is found on the list. */
473 static void
474 zaptemp (const char *name)
476 struct tempnode *node, *temp;
478 for (node = &temphead; node->next; node = node->next)
479 if (STREQ (name, node->next->name))
480 break;
481 if (node->next)
483 temp = node->next;
484 unlink (temp->name);
485 free (temp->name);
486 node->next = temp->next;
487 free ((char *) temp);
491 #ifdef ENABLE_NLS
492 /* Initialize the character class tables. */
494 static int
495 nls_sort_month_comp (const void *m1, const void *m2)
497 return strcoll (((const struct month *) m1)->name,
498 ((const struct month *) m2)->name);
501 /* Do collation on strings S1 and S2, but for at most L characters.
502 we use the fact, that we KNOW that LEN is the min of the two lengths */
503 static int
504 strncoll (char *s1, char *s2, int len)
506 register int diff;
508 if (need_locale)
510 /* Emulate a strncoll function, by forcing strcoll to compare
511 only the first LEN characters in each string. */
512 register unsigned char n1 = s1[len];
513 register unsigned char n2 = s2[len];
515 s1[len] = s2[len] = 0;
516 diff = strcoll (s1, s2);
517 s1[len] = n1;
518 s2[len] = n2;
520 else
522 diff = memcmp (s1, s2, len);
525 return diff;
528 /* Do collation on strings S1 and S2, but for at most L characters.
529 Use the fact, that we KNOW that S2 is the shorter string and has
530 length LEN. */
531 static int
532 strncoll_s2_readonly (char *s1, const char *s2, int len)
534 register int diff;
536 assert (len == strlen (s2));
537 assert (len <= strlen (s1));
539 if (need_locale)
541 /* Emulate a strncoll function, by forcing strcoll to compare
542 only the first LEN characters in each string. */
543 register unsigned char n1 = s1[len];
545 s1[len] = 0;
546 diff = strcoll (s1, s2);
547 s1[len] = n1;
549 else
551 diff = memcmp (s1, s2, len);
554 return diff;
557 #endif /* NLS */
559 static void
560 inittables (void)
562 int i;
564 for (i = 0; i < UCHAR_LIM; ++i)
566 if (ISBLANK (i))
567 blanks[i] = 1;
568 if (!ISPRINT (i))
569 nonprinting[i] = 1;
570 if (!ISALNUM (i) && !ISBLANK (i))
571 nondictionary[i] = 1;
572 if (ISLOWER (i))
573 fold_toupper[i] = toupper (i);
574 else
575 fold_toupper[i] = i;
578 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
579 /* If We're not in the "C" locale, read in different names for months. */
580 if (need_locale)
582 nls_months_collide[0] = 1; /* if an error, look again */
583 for (i = 0; i < NLS_NUM_MONTHS; i++)
585 char *s;
586 size_t s_len;
587 int j;
589 s = (char *) nl_langinfo (ABMON_1 + us_monthtab[i].val - 1);
590 s_len = strlen (s);
591 nls_monthtab[i].name = (char *) xmalloc (s_len + 1);
592 nls_monthtab[i].val = us_monthtab[i].val;
594 /* Be careful: abreviated month names
595 may be longer than the usual 3 characters. */
596 for (j = 0; j < s_len; j++)
597 nls_monthtab[i].name[j] = fold_toupper[UCHAR (s[j])];
598 nls_monthtab[i].name[j] = '\0';
600 nls_months_collide[nls_monthtab[i].val] = 0;
601 for (j = 0; j < NLS_NUM_MONTHS; ++j)
603 if (STREQ (nls_monthtab[i].name, us_monthtab[i].name))
605 /* There are indeed some month names in English which
606 collide with the NLS name. */
607 nls_months_collide[nls_monthtab[i].val] = 1;
608 break;
612 /* Now quicksort the month table (should be sorted already!).
613 However, another locale doesn't rule out the possibility
614 of a different order of month names. */
615 qsort ((void *) nls_monthtab, NLS_NUM_MONTHS,
616 sizeof (struct month), nls_sort_month_comp);
617 monthtab = nls_monthtab;
619 #endif /* NLS */
622 /* Initialize BUF, allocating ALLOC bytes initially. */
624 static void
625 initbuf (struct buffer *buf, int alloc)
627 buf->alloc = alloc;
628 buf->buf = xmalloc (buf->alloc);
629 buf->used = buf->left = 0;
632 /* Fill BUF reading from FP, moving buf->left bytes from the end
633 of buf->buf to the beginning first. If EOF is reached and the
634 file wasn't terminated by a newline, supply one. Return a count
635 of bytes buffered. */
637 static int
638 fillbuf (struct buffer *buf, FILE *fp)
640 int cc;
642 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
643 buf->used = buf->left;
645 while (!feof (fp) && (buf->used == 0
646 || !memchr (buf->buf, eolchar, buf->used)))
648 if (buf->used == buf->alloc)
650 buf->alloc *= 2;
651 buf->buf = xrealloc (buf->buf, buf->alloc);
653 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
654 if (ferror (fp))
656 error (0, errno, _("read error"));
657 cleanup ();
658 exit (SORT_FAILURE);
660 buf->used += cc;
663 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
665 if (buf->used == buf->alloc)
667 buf->alloc *= 2;
668 buf->buf = xrealloc (buf->buf, buf->alloc);
670 buf->buf[buf->used++] = eolchar;
673 return buf->used;
676 /* Initialize LINES, allocating space for ALLOC lines initially.
677 LIMIT is the maximum possible number of lines to allocate space
678 for, ever. */
680 static void
681 initlines (struct lines *lines, int alloc, int limit)
683 lines->alloc = alloc;
684 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
685 lines->used = 0;
686 lines->limit = limit;
689 /* Return a pointer to the first character of the field specified
690 by KEY in LINE. */
692 static char *
693 begfield (const struct line *line, const struct keyfield *key)
695 register char *ptr = line->text, *lim = ptr + line->length;
696 register int sword = key->sword, schar = key->schar;
698 if (tab)
699 while (ptr < lim && sword--)
701 while (ptr < lim && *ptr != tab)
702 ++ptr;
703 if (ptr < lim)
704 ++ptr;
706 else
707 while (ptr < lim && sword--)
709 while (ptr < lim && blanks[UCHAR (*ptr)])
710 ++ptr;
711 while (ptr < lim && !blanks[UCHAR (*ptr)])
712 ++ptr;
715 if (key->skipsblanks)
716 while (ptr < lim && blanks[UCHAR (*ptr)])
717 ++ptr;
719 if (ptr + schar <= lim)
720 ptr += schar;
721 else
722 ptr = lim;
724 return ptr;
727 /* Return the limit of (a pointer to the first character after) the field
728 in LINE specified by KEY. */
730 static char *
731 limfield (const struct line *line, const struct keyfield *key)
733 register char *ptr = line->text, *lim = ptr + line->length;
734 register int eword = key->eword, echar = key->echar;
736 /* Note: from the POSIX spec:
737 The leading field separator itself is included in
738 a field when -t is not used. FIXME: move this comment up... */
740 /* Move PTR past EWORD fields or to one past the last byte on LINE,
741 whichever comes first. If there are more than EWORD fields, leave
742 PTR pointing at the beginning of the field having zero-based index,
743 EWORD. If a delimiter character was specified (via -t), then that
744 `beginning' is the first character following the delimiting TAB.
745 Otherwise, leave PTR pointing at the first `blank' character after
746 the preceding field. */
747 if (tab)
748 while (ptr < lim && eword--)
750 while (ptr < lim && *ptr != tab)
751 ++ptr;
752 if (ptr < lim && (eword || echar > 0))
753 ++ptr;
755 else
756 while (ptr < lim && eword--)
758 while (ptr < lim && blanks[UCHAR (*ptr)])
759 ++ptr;
760 while (ptr < lim && !blanks[UCHAR (*ptr)])
761 ++ptr;
764 #ifdef POSIX_UNSPECIFIED
765 /* The following block of code makes GNU sort incompatible with
766 standard Unix sort, so it's ifdef'd out for now.
767 The POSIX spec isn't clear on how to interpret this.
768 FIXME: request clarification.
770 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
771 Date: Thu, 30 May 96 12:20:41 -0400
773 [...]I believe I've found another bug in `sort'.
775 $ cat /tmp/sort.in
776 a b c 2 d
777 pq rs 1 t
778 $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
779 a b c 2 d
780 pq rs 1 t
781 $ /bin/sort +0.6 -0.7 </tmp/sort.in
782 pq rs 1 t
783 a b c 2 d
785 Unix sort produced the answer I expected: sort on the single character
786 in column 6. GNU sort produced different results, because it disagrees
787 on the interpretation of the key-end spec "-M.N". Unix sort reads this
788 as "skip M fields, then N characters"; but GNU sort wants it to mean
789 "skip M fields, then either N characters or the rest of the current
790 field, whichever comes first". This extra clause applies only to
791 key-ends, not key-starts.
794 /* Make LIM point to the end of (one byte past) the current field. */
795 if (tab)
797 char *newlim;
798 newlim = memchr (ptr, tab, lim - ptr);
799 if (newlim)
800 lim = newlim;
802 else
804 char *newlim;
805 newlim = ptr;
806 while (newlim < lim && blanks[UCHAR (*newlim)])
807 ++newlim;
808 while (newlim < lim && !blanks[UCHAR (*newlim)])
809 ++newlim;
810 lim = newlim;
812 #endif
814 /* If we're skipping leading blanks, don't start counting characters
815 until after skipping past any leading blanks. */
816 if (key->skipsblanks)
817 while (ptr < lim && blanks[UCHAR (*ptr)])
818 ++ptr;
820 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
821 if (ptr + echar <= lim)
822 ptr += echar;
823 else
824 ptr = lim;
826 return ptr;
829 /* FIXME */
831 void
832 trim_trailing_blanks (const char *a_start, char **a_end)
834 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
835 --(*a_end);
838 /* Find the lines in BUF, storing pointers and lengths in LINES.
839 Also replace newlines in BUF with NULs. */
841 static void
842 findlines (struct buffer *buf, struct lines *lines)
844 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
845 struct keyfield *key = keyhead.next;
847 lines->used = 0;
849 while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))
850 && lines->used < lines->limit)
852 /* There are various places in the code that rely on a NUL
853 being at the end of in-core lines; NULs inside the lines
854 will not cause trouble, though. */
855 *ptr = '\0';
857 if (lines->used == lines->alloc)
859 lines->alloc *= 2;
860 lines->lines = (struct line *)
861 xrealloc ((char *) lines->lines,
862 lines->alloc * sizeof (struct line));
865 lines->lines[lines->used].text = beg;
866 lines->lines[lines->used].length = ptr - beg;
868 /* Precompute the position of the first key for efficiency. */
869 if (key)
871 if (key->eword >= 0)
872 lines->lines[lines->used].keylim =
873 limfield (&lines->lines[lines->used], key);
874 else
875 lines->lines[lines->used].keylim = ptr;
877 if (key->sword >= 0)
878 lines->lines[lines->used].keybeg =
879 begfield (&lines->lines[lines->used], key);
880 else
882 if (key->skipsblanks)
883 while (blanks[UCHAR (*beg)])
884 ++beg;
885 lines->lines[lines->used].keybeg = beg;
887 if (key->skipeblanks)
889 trim_trailing_blanks (lines->lines[lines->used].keybeg,
890 &lines->lines[lines->used].keylim);
893 else
895 lines->lines[lines->used].keybeg = 0;
896 lines->lines[lines->used].keylim = 0;
899 ++lines->used;
900 beg = ptr + 1;
903 buf->left = lim - beg;
906 /* Compare strings A and B containing decimal fractions < 1. Each string
907 should begin with a decimal point followed immediately by the digits
908 of the fraction. Strings not of this form are considered to be zero. */
910 /* The goal here, is to take two numbers a and b... compare these
911 in parallel. Instead of converting each, and then comparing the
912 outcome. Most likely stopping the comparison before the conversion
913 is complete. The algorithm used, in the old sort:
915 Algorithm: fraccompare
916 Action : compare two decimal fractions
917 accepts : char *a, char *b
918 returns : -1 if a<b, 0 if a=b, 1 if a>b.
919 implement:
921 if *a == decimal_point AND *b == decimal_point
922 find first character different in a and b.
923 if both are digits, return the difference *a - *b.
924 if *a is a digit
925 skip past zeroes
926 if digit return 1, else 0
927 if *b is a digit
928 skip past zeroes
929 if digit return -1, else 0
930 if *a is a decimal_point
931 skip past decimal_point and zeroes
932 if digit return 1, else 0
933 if *b is a decimal_point
934 skip past decimal_point and zeroes
935 if digit return -1, else 0
936 return 0
938 The above implementation duplicates code, and thus there is room
939 for improvement:
940 the difference in code of a and b, is solved by using a
941 reference to s, assigned to either a or b. and using diff
942 to denote return value.
943 the difference in either that start being a digit or
944 the decimal point, is solved by testing if either is
945 a decimal point, or if the other is a digit...
947 if *a or *b is a decimal_point
948 skip all chars where *a == *b
949 if *a and *b are digits
950 return *a - *b
951 if *a is a digit or *a is a decimal_point
952 s is a
953 diff is 1
954 else
955 s is b
956 diff is -1
957 skip decimal_point in s
958 skip zeroes in s
959 if *s is a digit
960 return diff
961 return 0 */
963 #define USE_NEW_FRAC_COMPARE
964 #ifdef USE_NEW_FRAC_COMPARE
966 static int
967 fraccompare (register const char *a, register const char *b)
969 # ifdef ENABLE_NLS
970 nls_fraction_found = 1;
971 # endif
973 if (*a == decimal_point || *b == decimal_point)
975 register const char *s;
976 int diff;
978 while (*a == *b)
980 ++a;
981 ++b;
982 if (!ISDIGIT (*a))
983 break;
986 if (ISDIGIT (*a) && ISDIGIT (*b))
987 return (*a) - (*b);
989 if (*a == decimal_point || (ISDIGIT (*a) && *b != decimal_point))
991 s = a;
992 diff = 1;
994 else
996 s = b;
997 diff = -1;
999 if (*s == decimal_point)
1000 ++s;
1001 while (*s == NUMERIC_ZERO)
1002 ++s;
1003 if (ISDIGIT (*s))
1004 return diff;
1006 return 0;
1009 #else
1010 static int
1011 fraccompare (register const char *a, register const char *b)
1013 register int tmpa = *a;
1014 register int tmpb = *b;
1016 if (tmpa == decimal_point && tmpb == decimal_point)
1019 tmpa = *++a, tmpb = *++b;
1020 while (tmpa == tmpb && ISDIGIT (tmpa));
1021 if (ISDIGIT (tmpa) && ISDIGIT (tmpb))
1022 return tmpa - tmpb;
1023 if (ISDIGIT (tmpa))
1025 while (tmpa == NUMERIC_ZERO)
1026 tmpa = *++a;
1027 if (ISDIGIT (tmpa))
1028 return 1;
1029 return 0;
1031 if (ISDIGIT (tmpb))
1033 while (tmpb == NUMERIC_ZERO)
1034 tmpb = *++b;
1035 if (ISDIGIT (tmpb))
1036 return -1;
1037 return 0;
1039 return 0;
1041 else if (tmpa == decimal_point)
1044 tmpa = *++a;
1045 while (tmpa == NUMERIC_ZERO);
1046 if (ISDIGIT (tmpa))
1047 return 1;
1048 return 0;
1050 else if (tmpb == decimal_point)
1053 tmpb = *++b;
1054 while (tmpb == NUMERIC_ZERO);
1055 if (ISDIGIT (tmpb))
1056 return -1;
1057 return 0;
1059 return 0;
1061 #endif
1063 /* Compare strings A and B as numbers without explicitly converting them to
1064 machine numbers. Comparatively slow for short strings, but asymptotically
1065 hideously fast. */
1067 /* The code here, is like the above... continuous reoccurrance of the
1068 same code... improved 15-JAN-1997 in connection with native languages
1069 support */
1071 #ifdef ENABLE_NLS
1073 /* Decide the kind of fraction the program will use */
1074 static void
1075 nls_set_fraction (register unsigned char ch)
1077 if (!nls_fraction_found && ch != decimal_point)
1079 if (ch == FLOATING_POINT)
1080 { /* US style */
1081 decimal_point = FLOATING_POINT;
1082 th_sep = FLOATING_COMMA;
1084 else if (ch == FLOATING_COMMA)
1085 { /* EU style */
1086 decimal_point = FLOATING_COMMA;
1087 th_sep = FLOATING_POINT;
1089 else if (ch != decimal_point)
1090 { /* Alien */
1091 decimal_point = ch;
1092 th_sep = '\0';
1095 nls_fraction_found = 1;
1098 /* Look for a fraction
1099 It isn't as simple as it looks... however, consider a number:
1100 1.234,00
1101 1,234.00
1102 It's easy to tell which is a decimal point, and which isn't. We use
1103 the grouping information to find out how many digits are grouped together
1104 for thousand separator.
1106 The idea here, is to use the grouping information... but not to
1107 spend time with verifying the groups... not too much time, anyway.
1108 so, a number represented to us as:
1109 1.234.567,89
1110 will be taken and separated into different groups, separated by a
1111 separator character (Decimal point or thousands separator).
1112 {1,234,567}
1113 these are the groups of digits that lead to a separator character,
1114 and with the trailing group is added:
1115 {1,234,567,89}
1116 resulting in 4 groups of numbers. If the resulting number of groups,
1117 are none, or just 1... this is not enough to decide anything about
1118 the decimal point. We need at least two for that. With two groups
1119 we have at least one separator. That separator can be a decimal
1120 point, or a thousands separator... if it is a thousands separator
1121 the number of digits in the last group, will comply with the first
1122 rule in the grouping rule for numeric values. i.e.
1123 |{89}| = grouping[0]
1124 if so, and there are only two groups of numbers, the value cannot
1125 be determined. If there are three or more numbers, the separator
1126 separating the groups is checked. If these are the same, the
1127 character is determined to be a thousands separator. If they are
1128 not the same, the last separator is determined to be a decimal
1129 point. If checking the grouping rules, we find out that there
1130 are no grouping rules defined, either the grouping rules is NULL
1131 or the first grouping number is 0, then the locale format is used.
1133 We try to take an advantage of a special situation. If the trailing
1134 group, the one that normally should be the fractional part, turns
1135 out to have the same length as the thousands separator rule says,
1136 making a doubt on that it may be a decimal point, we look for the
1137 group before that, i.e. with a two group form:
1138 {1234,567}
1139 where the grouping rule is 3;3... we take a look at group 1, and find
1140 out that |{1234}| > larger of the two first grouping rules, then
1141 the separator has to be a decimal point...
1144 static void
1145 look_for_fraction (const char *s, const char *e)
1147 register const char *p;
1148 register unsigned short n = 0;
1149 static unsigned short max_groups = 0;
1150 static unsigned short *groups = NULL;
1152 if (groups == NULL)
1154 max_groups = NLS_MAX_GROUPS;
1155 groups = (unsigned short *) xmalloc (sizeof (*groups) * max_groups);
1158 /* skip blanks and signs */
1159 while (blanks[UCHAR (*s)] || *s == NEGATION_SIGN)
1160 s++;
1161 /* groups = {}, n = 0 */
1162 for (p = s; p < e; p++)
1164 /* groups[n]={number of digits leading to separator n}
1165 n = number of separators so far */
1166 if (*p == decimal_point || *p == th_sep || *p == FLOATING_POINT)
1168 if (++n >= max_groups)
1170 /* BIG Number... enlarge table */
1171 max_groups += NLS_MAX_GROUPS;
1172 groups = (unsigned short *) xrealloc ((char *) groups,
1173 (sizeof (*groups)
1174 * max_groups));
1176 groups[n] = (unsigned short) (p - s);
1177 s = p + 1;
1179 else if (!ISDIGIT (*p))
1180 break;
1181 /* mem[s..p]=digits only */
1183 /* n = number of separators in s..e */
1184 groups[++n] = (short) (p - s);
1185 /* n = groups in the number */
1186 if (n <= 1)
1187 return; /* Only one group of numbers... not enough */
1188 p = nls_grouping;
1189 /* p = address of group rules
1190 s = address of next character after separator */
1191 s = s - 1; /* s = address of last separator */
1192 if (p && *p)
1194 /* a legal trailing group, iff groups[n] == first rule */
1195 if (groups[n] != (short) *p)
1196 nls_set_fraction (*s);
1197 else
1199 if (n == 2)
1200 { /* Only two groups */
1201 if (groups[n - 1] > max (p[0], p[1]))
1202 nls_set_fraction (*s);
1203 return;
1205 /* if the separators are the same, it's a thousands */
1206 if (*s != *(s - groups[n]))
1207 nls_set_fraction (*s);
1208 /* s[0] = thousands separator */
1209 else if (*s == th_sep)
1210 nls_fraction_found = 1;
1213 else
1215 /* no grouping allowed here, last separator IS decimal point */
1216 nls_set_fraction (*s);
1220 static int
1221 numcompare (register const char *a, register const char *b)
1223 int ret_code = 1; /* normal return status, see later in code */
1224 int diff = 0; /* difference between two digits */
1226 while (blanks[UCHAR (*a)])
1227 ++a;
1228 while (blanks[UCHAR (*b)])
1229 ++b;
1231 /* next character in a,b is non-blank */
1232 if ((*a == NEGATION_SIGN || *b == NEGATION_SIGN) && *a != *b)
1234 /* a < 0, or b < 0, but not both */
1235 if (*a == NEGATION_SIGN)
1236 ret_code = -1, ++a; /* a looks < b */
1237 else if (*b == NEGATION_SIGN)
1238 ret_code = 1, ++b; /* b looks < a */
1239 /* bypass zeroes, decimal points, and thousand sep in a & b */
1240 while (*a == NUMERIC_ZERO || (th_sep && *a == th_sep)
1241 || *a == decimal_point)
1242 ++a;
1244 while (*b == NUMERIC_ZERO || (th_sep && *b == th_sep)
1245 || *b == decimal_point)
1246 ++b;
1248 if (ISDIGIT (*a) || ISDIGIT (*b))
1249 /* here, either a or b or both are digits
1250 if a and b are digits, the signed one is the lesser.
1251 if a is a digit, and not b.. it means b==0, and if b==0
1252 than either is signed if b is signed then -0 < a
1253 or if a is signed then -a < 0. The ret_code is already set
1254 to mark that the signed number is the lesser, so we just
1255 return that number here. */
1256 return ret_code;
1258 /* *a and *b are neither digits, they are equal -0 == +0 */
1259 return 0;
1261 else
1263 /* either both numbers are signed, or both are not-signed */
1264 if (*a == NEGATION_SIGN)
1266 ++a;
1267 ++b;
1268 ret_code = -1;
1270 /* if both are signed, then remember -100 < -10 (ret_code reversed!) */
1272 /* Skip any leading zeroes */
1273 while (*a == NUMERIC_ZERO)
1274 ++a;
1275 while (*b == NUMERIC_ZERO)
1276 ++b;
1278 continue_thousands:
1280 /* skip all equal digits */
1281 while (ISDIGIT (*a) && ISDIGIT (*b) && *a == *b)
1282 a++, b++;
1284 /* Here, we have either different digits, or possible fractions
1285 or thousand separators. */
1287 if (ISDIGIT (*a) && ISDIGIT (*b))
1289 if (diff == 0)
1290 diff = ((*a) - (*b)); /* simple, isn't it? not quite */
1291 a++, b++;
1292 goto continue_thousands;
1295 /* now, here either may be a fraction, or a thousand separator...
1296 or both. */
1297 /* We've decided what are decimal_points, and what are thousands sep */
1298 if ((th_sep != 0) && (*a == th_sep || *b == th_sep))
1300 if (*a == th_sep)
1301 ++a;
1302 if (*b == th_sep)
1303 ++b;
1304 goto continue_thousands; /* Ugly, but better than a while(1) */
1307 if (ISDIGIT (*a))
1308 return ret_code; /* a has more digits than b */
1309 if (ISDIGIT (*b))
1310 return ret_code * -1; /* b has more digits than a */
1312 /* now, we should have the fractions solved */
1313 if ((diff == 0) && (*a == decimal_point || *b == decimal_point))
1314 return ret_code * fraccompare (a, b);
1316 return diff; /* fall through here, and diff decides */
1319 #else
1320 static int
1321 numcompare (register const char *a, register const char *b)
1323 register int tmpa, tmpb, loga, logb, tmp;
1325 tmpa = UCHAR (*a);
1326 tmpb = UCHAR (*b);
1328 while (blanks[tmpa])
1329 tmpa = UCHAR (*++a);
1330 while (blanks[tmpb])
1331 tmpb = UCHAR (*++b);
1333 if (tmpa == NEGATION_SIGN)
1336 tmpa = UCHAR (*++a);
1337 while (tmpa == NUMERIC_ZERO);
1338 if (tmpb != NEGATION_SIGN)
1340 if (tmpa == decimal_point)
1342 tmpa = *++a;
1343 while (tmpa == NUMERIC_ZERO);
1344 if (ISDIGIT (tmpa))
1345 return -1;
1346 while (tmpb == NUMERIC_ZERO)
1347 tmpb = UCHAR (*++b);
1348 if (tmpb == decimal_point)
1350 tmpb = *++b;
1351 while (tmpb == NUMERIC_ZERO);
1352 if (ISDIGIT (tmpb))
1353 return -1;
1354 return 0;
1357 tmpb = UCHAR (*++b);
1358 while (tmpb == NUMERIC_ZERO);
1360 while (tmpa == tmpb && ISDIGIT (tmpa))
1361 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
1363 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1364 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1365 return -fraccompare (a, b);
1367 if (ISDIGIT (tmpa))
1368 for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga)
1370 else
1371 loga = 0;
1373 if (ISDIGIT (tmpb))
1374 for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb)
1376 else
1377 logb = 0;
1379 if ((tmp = logb - loga) != 0)
1380 return tmp;
1382 if (!loga)
1383 return 0;
1385 return tmpb - tmpa;
1387 else if (tmpb == NEGATION_SIGN)
1390 tmpb = UCHAR (*++b);
1391 while (tmpb == NUMERIC_ZERO);
1392 if (tmpb == decimal_point)
1394 tmpb = *++b;
1395 while (tmpb == NUMERIC_ZERO);
1396 if (ISDIGIT (tmpb))
1397 return 1;
1398 while (tmpa == NUMERIC_ZERO)
1399 tmpa = UCHAR (*++a);
1400 if (tmpa == decimal_point)
1402 tmpa = UCHAR (*++a);
1403 while (tmpa == NUMERIC_ZERO);
1404 if (ISDIGIT (tmpa))
1405 return 1;
1406 return 0;
1408 else
1410 while (tmpa == NUMERIC_ZERO)
1411 tmpa = UCHAR (*++a);
1412 while (tmpb == NUMERIC_ZERO)
1413 tmpb = UCHAR (*++b);
1415 while (tmpa == tmpb && ISDIGIT (tmpa))
1416 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
1418 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1419 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1420 return fraccompare (a, b);
1422 if (ISDIGIT (tmpa))
1423 for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga)
1425 else
1426 loga = 0;
1428 if (ISDIGIT (tmpb))
1429 for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb)
1431 else
1432 logb = 0;
1434 if ((tmp = loga - logb) != 0)
1435 return tmp;
1437 if (!loga)
1438 return 0;
1440 return tmpa - tmpb;
1443 #endif
1445 static int
1446 general_numcompare (const char *sa, const char *sb)
1448 double a, b;
1449 /* FIXME: add option to warn about failed conversions. */
1450 /* FIXME: maybe add option to try expensive FP conversion
1451 only if A and B can't be compared more cheaply/accurately. */
1452 if (xstrtod (sa, NULL, &a))
1454 a = 0;
1456 if (xstrtod (sb, NULL, &b))
1458 b = 0;
1460 return a == b ? 0 : a < b ? -1 : 1;
1463 /* Return an integer in 1..12 of the month name S with length LEN.
1464 Return 0 if the name in S is not recognized. */
1466 static int
1467 getmonth (const char *s, int len)
1469 char *month;
1470 register int i, lo = 0, hi = 12, result;
1472 while (len > 0 && blanks[UCHAR (*s)])
1474 ++s;
1475 --len;
1478 if (len == 0)
1479 return 0;
1481 month = (char *) alloca (len + 1);
1482 for (i = 0; i < len; ++i)
1483 month[i] = fold_toupper[UCHAR (s[i])];
1484 while (blanks[UCHAR (month[i - 1])])
1485 --i;
1486 month[i] = '\0';
1490 int ix = (lo + hi) / 2;
1492 len = strlen (monthtab[ix].name);
1493 if (NLS_STRNCMP (month, monthtab[ix].name, len) < 0)
1494 hi = ix;
1495 else
1496 lo = ix;
1498 while (hi - lo > 1);
1500 result = (!strncmp (month, monthtab[lo].name, len) ? monthtab[lo].val : 0);
1502 return result;
1505 #ifdef ENABLE_NLS
1506 /* Look for the month in locale table, and if that fails try with
1507 us month name table */
1508 static int
1509 nls_month_is_either_locale (const char *s, int len)
1511 int ind;
1513 monthtab = nls_monthtab;
1514 ind = getmonth (s, len);
1515 if (ind == 0)
1517 monthtab = us_monthtab;
1518 ind = getmonth (s, len);
1520 return ind;
1522 #endif
1524 /* Compare two lines A and B trying every key in sequence until there
1525 are no more keys or a difference is found. */
1527 static int
1528 keycompare (const struct line *a, const struct line *b)
1530 register char *texta, *textb, *lima, *limb;
1531 register unsigned char *translate;
1532 register int *ignore;
1533 struct keyfield *key;
1534 int diff = 0, iter = 0, lena, lenb;
1536 for (key = keyhead.next; key; key = key->next, ++iter)
1538 int comparable_lengths = 1;
1540 ignore = key->ignore;
1541 translate = (unsigned char *) key->translate;
1543 /* Find the beginning and limit of each field. */
1544 if (iter || a->keybeg == NULL || b->keybeg == NULL)
1546 if (key->eword >= 0)
1547 lima = limfield (a, key), limb = limfield (b, key);
1548 else
1549 lima = a->text + a->length, limb = b->text + b->length;
1551 if (key->sword >= 0)
1552 texta = begfield (a, key), textb = begfield (b, key);
1553 else
1555 texta = a->text, textb = b->text;
1556 if (key->skipsblanks)
1558 while (texta < lima && blanks[UCHAR (*texta)])
1559 ++texta;
1560 while (textb < limb && blanks[UCHAR (*textb)])
1561 ++textb;
1565 else
1567 /* For the first iteration only, the key positions have
1568 been precomputed for us. */
1569 texta = a->keybeg, lima = a->keylim;
1570 textb = b->keybeg, limb = b->keylim;
1573 /* Find the lengths. */
1574 lena = lima - texta, lenb = limb - textb;
1575 if (lena < 0)
1576 lena = 0;
1577 if (lenb < 0)
1578 lenb = 0;
1580 if (key->skipeblanks)
1582 char *a_end = texta + lena;
1583 char *b_end = textb + lenb;
1584 trim_trailing_blanks (texta, &a_end);
1585 trim_trailing_blanks (textb, &b_end);
1586 lena = a_end - texta;
1587 lenb = b_end - textb;
1590 /* Actually compare the fields. */
1591 if (key->numeric)
1593 if (*lima || *limb)
1595 char savea = *lima, saveb = *limb;
1597 *lima = *limb = '\0';
1598 diff = numcompare (texta, textb);
1599 *lima = savea, *limb = saveb;
1601 else
1602 diff = numcompare (texta, textb);
1604 if (diff)
1605 return key->reverse ? -diff : diff;
1606 continue;
1608 else if (key->general_numeric)
1610 if (*lima || *limb)
1612 char savea = *lima, saveb = *limb;
1614 *lima = *limb = '\0';
1615 diff = general_numcompare (texta, textb);
1616 *lima = savea, *limb = saveb;
1618 else
1619 diff = general_numcompare (texta, textb);
1621 if (diff)
1622 return key->reverse ? -diff : diff;
1623 continue;
1625 else if (key->month)
1627 #ifdef ENABLE_NLS
1629 /* if we haven't decided which locale to go with, we get the
1630 month name from either. If either month name is fully
1631 solved and the month name doesn't collide with the other
1632 locale... then use that table from there forward */
1633 if (!nls_month_found)
1635 int x;
1637 x = nls_month_is_either_locale (texta, lena);
1638 nls_month_found = !nls_months_collide[x];
1639 if (nls_month_found)
1641 diff = x - getmonth (textb, lenb);
1643 else
1645 diff = nls_month_is_either_locale (textb, lenb);
1646 nls_month_found = !nls_months_collide[diff];
1647 diff = x - diff;
1650 else
1651 #endif
1652 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1653 if (diff)
1654 return key->reverse ? -diff : diff;
1655 continue;
1657 #ifdef ENABLE_NLS
1659 /* Sorting like this may become slow, so in a simple locale the user
1660 can select a faster sort that is similar to ascii sort */
1661 else if (need_locale)
1663 /* FIXME: consider making parameters non-const, then when
1664 both ignore and translate are NULL (which should be most
1665 of the time) we could temporarily NUL-terminate them in
1666 place and avoid the copy. */
1668 char *copy_a = (char *) alloca (lena + 1);
1669 char *copy_b = (char *) alloca (lenb + 1);
1670 int new_len_a, new_len_b, i;
1672 /* We can't use strcoll directly on the two strings,
1673 but rather must extract the text for the key
1674 (to NUL-terminate for strcoll) and handle any
1675 'ignore' and/or 'translate' before comparing. */
1676 for (new_len_a = new_len_b = i = 0; i < max (lena, lenb); i++)
1678 if (i < lena)
1680 copy_a[new_len_a] = (translate
1681 ? translate[UCHAR (texta[i])]
1682 : texta[i]);
1683 if (!ignore || !ignore[UCHAR (texta[i])])
1684 ++new_len_a;
1686 if (i < lenb)
1688 copy_b[new_len_b] = (translate
1689 ? translate[UCHAR (textb[i])]
1690 : textb [i]);
1691 if (!ignore || !ignore[UCHAR (textb[i])])
1692 ++new_len_b;
1696 copy_a[new_len_a] = copy_b[new_len_b] = 0;
1698 diff = strcoll (copy_a, copy_b);
1700 /* Free copy_a and copy_b. */
1701 alloca (0);
1703 if (diff)
1704 return key->reverse ? -diff : diff;
1706 continue;
1708 #endif
1709 else if (ignore && translate)
1711 #define CMP_WITH_IGNORE(A, B) \
1712 do \
1714 while (texta < lima && textb < limb) \
1716 while (texta < lima && ignore[UCHAR (*texta)]) \
1717 ++texta; \
1718 while (textb < limb && ignore[UCHAR (*textb)]) \
1719 ++textb; \
1720 if (texta < lima && textb < limb) \
1722 if ((A) != (B)) \
1724 diff = UCHAR (A) - UCHAR (B); \
1725 break; \
1727 ++texta; \
1728 ++textb; \
1731 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1732 diff = -1; \
1733 else if (texta < lima && textb == limb \
1734 && !ignore[UCHAR (*texta)]) \
1735 diff = 1; \
1738 if (diff == 0) \
1740 while (texta < lima && ignore[UCHAR (*texta)]) \
1741 ++texta; \
1742 while (textb < limb && ignore[UCHAR (*textb)]) \
1743 ++textb; \
1745 if (texta == lima && textb < limb) \
1746 diff = -1; \
1747 else if (texta < lima && textb == limb) \
1748 diff = 1; \
1750 /* Relative lengths are meaningless if characters were ignored. \
1751 Handling this case here avoids what might be an invalid length \
1752 comparison below. */ \
1753 if (diff == 0 && texta == lima && textb == limb) \
1754 comparable_lengths = 0; \
1756 while (0)
1758 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1759 else if (ignore)
1760 CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1761 else if (translate)
1762 while (texta < lima && textb < limb)
1764 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1766 diff = (UCHAR (translate[UCHAR (*--texta)])
1767 - UCHAR (translate[UCHAR (*--textb)]));
1768 break;
1771 else
1773 diff = NLS_MEMCMP (texta, textb, min (lena, lenb));
1776 if (diff)
1777 return key->reverse ? -diff : diff;
1778 if (comparable_lengths && (diff = lena - lenb) != 0)
1779 return key->reverse ? -diff : diff;
1782 return 0;
1785 /* Compare two lines A and B, returning negative, zero, or positive
1786 depending on whether A compares less than, equal to, or greater than B. */
1788 static int
1789 compare (register const struct line *a, register const struct line *b)
1791 int diff, tmpa, tmpb, mini;
1793 /* First try to compare on the specified keys (if any).
1794 The only two cases with no key at all are unadorned sort,
1795 and unadorned sort -r. */
1796 if (keyhead.next)
1798 diff = keycompare (a, b);
1799 if (diff != 0)
1800 return diff;
1801 if (unique || stable)
1802 return 0;
1805 /* If the keys all compare equal (or no keys were specified)
1806 fall through to the default byte-by-byte comparison. */
1807 tmpa = a->length, tmpb = b->length;
1808 mini = min (tmpa, tmpb);
1809 if (mini == 0)
1810 diff = tmpa - tmpb;
1811 else
1813 char *ap = a->text, *bp = b->text;
1815 #ifdef ENABLE_NLS
1816 if (need_locale) /* want absolutely correct sorting */
1818 diff = strcoll (ap, bp);
1819 return reverse ? -diff : diff;
1821 #endif
1822 diff = UCHAR (*ap) - UCHAR (*bp);
1823 if (diff == 0)
1825 diff = NLS_MEMCMP (ap, bp, mini);
1826 if (diff == 0)
1827 diff = tmpa - tmpb;
1831 return reverse ? -diff : diff;
1834 /* Check that the lines read from the given FP come in order. Print a
1835 diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1836 the line number of the first out-of-order line (counting from 1) if they
1837 are not in order. Otherwise, print no diagnostic and return zero. */
1839 static int
1840 checkfp (FILE *fp, const char *file_name)
1842 struct buffer buf; /* Input buffer. */
1843 struct lines lines; /* Lines scanned from the buffer. */
1844 struct line temp; /* Copy of previous line. */
1845 int cc; /* Character count. */
1846 int alloc;
1847 int line_number = 1;
1848 struct line *disorder_line;
1849 int disorder_line_number = 0;
1851 #ifdef lint /* Suppress `used before initialized' warning. */
1852 disorder_line = NULL;
1853 #endif
1855 initbuf (&buf, mergealloc);
1856 initlines (&lines, mergealloc / linelength + 1,
1857 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1858 alloc = linelength;
1859 temp.text = xmalloc (alloc);
1861 cc = fillbuf (&buf, fp);
1862 if (cc == 0)
1863 goto finish;
1865 findlines (&buf, &lines);
1867 while (1)
1869 struct line *prev_line; /* Pointer to previous line. */
1870 int cmp; /* Result of calling compare. */
1871 int i;
1873 /* Compare each line in the buffer with its successor. */
1874 for (i = 0; i < lines.used - 1; ++i)
1876 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1877 if ((unique && cmp >= 0) || (cmp > 0))
1879 disorder_line = &lines.lines[i + 1];
1880 disorder_line_number = line_number + i + 1;
1881 goto finish;
1885 line_number += lines.used;
1887 /* Save the last line of the buffer and refill the buffer. */
1888 prev_line = lines.lines + (lines.used - 1);
1889 if (prev_line->length + 1 > alloc)
1893 alloc *= 2;
1895 while (alloc < prev_line->length + 1);
1896 temp.text = xrealloc (temp.text, alloc);
1898 assert (prev_line->length + 1 <= alloc);
1899 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1900 temp.length = prev_line->length;
1901 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1902 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1904 cc = fillbuf (&buf, fp);
1905 if (cc == 0)
1906 break;
1908 findlines (&buf, &lines);
1909 /* Make sure the line saved from the old buffer contents is
1910 less than or equal to the first line of the new buffer. */
1911 cmp = compare (&temp, &lines.lines[0]);
1912 if ((unique && cmp >= 0) || (cmp > 0))
1914 disorder_line = &lines.lines[0];
1915 disorder_line_number = line_number;
1916 break;
1920 finish:
1921 xfclose (fp);
1923 if (disorder_line_number)
1925 fprintf (stderr, _("%s: %s:%d: disorder: "), program_name, file_name,
1926 disorder_line_number);
1927 write_bytes (disorder_line->text, disorder_line->length, stderr);
1928 putc (eolchar, stderr);
1931 free (buf.buf);
1932 free ((char *) lines.lines);
1933 free (temp.text);
1934 return disorder_line_number;
1937 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1938 Close FPS before returning. */
1940 static void
1941 mergefps (FILE **fps, register int nfps, FILE *ofp)
1943 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1944 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1945 struct line saved; /* Saved line for unique check. */
1946 int savedflag = 0; /* True if there is a saved line. */
1947 int savealloc; /* Size allocated for the saved line. */
1948 int cur[NMERGE]; /* Current line in each line table. */
1949 int ord[NMERGE]; /* Table representing a permutation of fps,
1950 such that lines[ord[0]].lines[cur[ord[0]]]
1951 is the smallest line and will be next
1952 output. */
1953 register int i, j, t;
1955 #ifdef lint /* Suppress `used before initialized' warning. */
1956 savealloc = 0;
1957 #endif
1959 /* Allocate space for a saved line if necessary. */
1960 if (unique)
1962 savealloc = linelength;
1963 saved.text = xmalloc (savealloc);
1966 /* Read initial lines from each input file. */
1967 for (i = 0; i < nfps; ++i)
1969 initbuf (&buffer[i], mergealloc);
1970 /* If a file is empty, eliminate it from future consideration. */
1971 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1973 xfclose (fps[i]);
1974 --nfps;
1975 for (j = i; j < nfps; ++j)
1976 fps[j] = fps[j + 1];
1978 if (i == nfps)
1979 free (buffer[i].buf);
1980 else
1982 initlines (&lines[i], mergealloc / linelength + 1,
1983 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1984 findlines (&buffer[i], &lines[i]);
1985 cur[i] = 0;
1989 /* Set up the ord table according to comparisons among input lines.
1990 Since this only reorders two items if one is strictly greater than
1991 the other, it is stable. */
1992 for (i = 0; i < nfps; ++i)
1993 ord[i] = i;
1994 for (i = 1; i < nfps; ++i)
1995 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1996 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1997 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1999 /* Repeatedly output the smallest line until no input remains. */
2000 while (nfps)
2002 /* If uniqified output is turned on, output only the first of
2003 an identical series of lines. */
2004 if (unique)
2006 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
2008 write_bytes (saved.text, saved.length, ofp);
2009 putc (eolchar, ofp);
2010 savedflag = 0;
2012 if (!savedflag)
2014 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
2016 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
2017 savealloc *= 2;
2018 saved.text = xrealloc (saved.text, savealloc);
2020 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
2021 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
2022 saved.length + 1);
2023 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
2025 saved.keybeg = saved.text +
2026 (lines[ord[0]].lines[cur[ord[0]]].keybeg
2027 - lines[ord[0]].lines[cur[ord[0]]].text);
2029 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
2031 saved.keylim = saved.text +
2032 (lines[ord[0]].lines[cur[ord[0]]].keylim
2033 - lines[ord[0]].lines[cur[ord[0]]].text);
2035 savedflag = 1;
2038 else
2040 write_bytes (lines[ord[0]].lines[cur[ord[0]]].text,
2041 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
2042 putc (eolchar, ofp);
2045 /* Check if we need to read more lines into core. */
2046 if (++cur[ord[0]] == lines[ord[0]].used)
2048 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
2050 findlines (&buffer[ord[0]], &lines[ord[0]]);
2051 cur[ord[0]] = 0;
2053 else
2055 /* We reached EOF on fps[ord[0]]. */
2056 for (i = 1; i < nfps; ++i)
2057 if (ord[i] > ord[0])
2058 --ord[i];
2059 --nfps;
2060 xfclose (fps[ord[0]]);
2061 free (buffer[ord[0]].buf);
2062 free ((char *) lines[ord[0]].lines);
2063 for (i = ord[0]; i < nfps; ++i)
2065 fps[i] = fps[i + 1];
2066 buffer[i] = buffer[i + 1];
2067 lines[i] = lines[i + 1];
2068 cur[i] = cur[i + 1];
2070 for (i = 0; i < nfps; ++i)
2071 ord[i] = ord[i + 1];
2072 continue;
2076 /* The new line just read in may be larger than other lines
2077 already in core; push it back in the queue until we encounter
2078 a line larger than it. */
2079 for (i = 1; i < nfps; ++i)
2081 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
2082 &lines[ord[i]].lines[cur[ord[i]]]);
2083 if (!t)
2084 t = ord[0] - ord[i];
2085 if (t < 0)
2086 break;
2088 t = ord[0];
2089 for (j = 1; j < i; ++j)
2090 ord[j - 1] = ord[j];
2091 ord[i - 1] = t;
2094 if (unique && savedflag)
2096 write_bytes (saved.text, saved.length, ofp);
2097 putc (eolchar, ofp);
2098 free (saved.text);
2102 #ifdef ENABLE_NLS
2104 /* Find the numeric format that this file represents to us for sorting. */
2105 static void
2106 nls_numeric_format (const struct line *line, int nlines)
2108 struct nls_keyfield *n_key = nls_keyhead;
2110 /* line = first line, nlines = number of lines,
2111 nls_fraction_found = false */
2112 for (; !nls_fraction_found && nlines > 0; line++, nlines--)
2114 int iter;
2115 for (iter = 0; !nls_fraction_found; iter++)
2117 char *text;
2118 char *lim;
2119 struct keyfield *key = n_key->key;
2121 /* text = {}, lim = {}, key = first key */
2122 if (iter || line->keybeg == NULL)
2124 /* Succeding keys, where the key field is
2125 specified */
2126 if (key->eword >= 0) /* key->eword = length of key */
2127 lim = limfield (line, key);
2128 else
2129 lim = line->text + line->length;
2130 /* lim = end of key field */
2132 if (key->sword >= 0) /* key->sword = start of key */
2133 text = begfield (line, key);
2134 else
2135 text = line->text;
2136 /* text = start of field */
2138 else
2140 /* First key is always the whole line */
2141 text = line->keybeg;
2142 lim = line->keylim;
2144 /* text = start of text to sort
2145 lim = end of text to sort */
2147 look_for_fraction (text, lim);
2149 /* nls_fraction_found = decimal_point found? */
2151 if ((n_key = n_key->next) == nls_keyhead)
2152 break; /* No more keys for this line */
2155 nls_fraction_found = 1;
2156 /* decide on current decimal_point known */
2159 #endif
2161 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
2163 static void
2164 sortlines (struct line *lines, int nlines, struct line *temp)
2166 register struct line *lo, *hi, *t;
2167 register int nlo, nhi;
2169 if (nlines == 2)
2171 if (compare (&lines[0], &lines[1]) > 0)
2173 *temp = lines[0];
2174 lines[0] = lines[1];
2175 lines[1] = *temp;
2177 return;
2180 nlo = nlines / 2;
2181 lo = lines;
2182 nhi = nlines - nlo;
2183 hi = lines + nlo;
2185 if (nlo > 1)
2186 sortlines (lo, nlo, temp);
2188 if (nhi > 1)
2189 sortlines (hi, nhi, temp);
2191 t = temp;
2193 while (nlo && nhi)
2194 if (compare (lo, hi) <= 0)
2195 *t++ = *lo++, --nlo;
2196 else
2197 *t++ = *hi++, --nhi;
2198 while (nlo--)
2199 *t++ = *lo++;
2201 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
2202 *lo++ = *t++;
2205 /* Check that each of the NFILES FILES is ordered.
2206 Return a count of disordered files. */
2208 static int
2209 check (char **files, int nfiles)
2211 int i, disorders = 0;
2212 FILE *fp;
2214 for (i = 0; i < nfiles; ++i)
2216 fp = xfopen (files[i], "r");
2217 if (checkfp (fp, files[i]))
2219 ++disorders;
2222 return disorders;
2225 /* Merge NFILES FILES onto OFP. */
2227 static void
2228 merge (char **files, int nfiles, FILE *ofp)
2230 int i, j, t;
2231 char *temp;
2232 FILE *fps[NMERGE], *tfp;
2234 while (nfiles > NMERGE)
2236 t = 0;
2237 for (i = 0; i < nfiles / NMERGE; ++i)
2239 for (j = 0; j < NMERGE; ++j)
2240 fps[j] = xfopen (files[i * NMERGE + j], "r");
2241 tfp = xtmpfopen (temp = tempname ());
2242 mergefps (fps, NMERGE, tfp);
2243 xfclose (tfp);
2244 for (j = 0; j < NMERGE; ++j)
2245 zaptemp (files[i * NMERGE + j]);
2246 files[t++] = temp;
2248 for (j = 0; j < nfiles % NMERGE; ++j)
2249 fps[j] = xfopen (files[i * NMERGE + j], "r");
2250 tfp = xtmpfopen (temp = tempname ());
2251 mergefps (fps, nfiles % NMERGE, tfp);
2252 xfclose (tfp);
2253 for (j = 0; j < nfiles % NMERGE; ++j)
2254 zaptemp (files[i * NMERGE + j]);
2255 files[t++] = temp;
2256 nfiles = t;
2259 for (i = 0; i < nfiles; ++i)
2260 fps[i] = xfopen (files[i], "r");
2261 mergefps (fps, i, ofp);
2262 for (i = 0; i < nfiles; ++i)
2263 zaptemp (files[i]);
2266 /* Sort NFILES FILES onto OFP. */
2268 static void
2269 sort (char **files, int nfiles, FILE *ofp)
2271 struct buffer buf;
2272 struct lines lines;
2273 struct line *tmp;
2274 int i, ntmp;
2275 FILE *fp, *tfp;
2276 struct tempnode *node;
2277 int n_temp_files = 0;
2278 char **tempfiles;
2280 initbuf (&buf, sortalloc);
2281 initlines (&lines, sortalloc / linelength + 1,
2282 LINEALLOC / sizeof (struct line));
2283 ntmp = lines.alloc;
2284 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
2286 while (nfiles--)
2288 fp = xfopen (*files++, "r");
2289 while (fillbuf (&buf, fp))
2291 findlines (&buf, &lines);
2292 if (lines.used > ntmp)
2294 while (lines.used > ntmp)
2295 ntmp *= 2;
2296 tmp = (struct line *)
2297 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
2299 #ifdef ENABLE_NLS
2300 if (nls_keyhead)
2301 nls_keyhead = nls_keyhead->next;
2302 if (!nls_fraction_found && nls_keyhead)
2303 nls_numeric_format (lines.lines, lines.used);
2304 #endif
2305 sortlines (lines.lines, lines.used, tmp);
2306 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
2308 tfp = ofp;
2310 else
2312 ++n_temp_files;
2313 tfp = xtmpfopen (tempname ());
2315 for (i = 0; i < lines.used; ++i)
2316 if (!unique || i == 0
2317 || compare (&lines.lines[i], &lines.lines[i - 1]))
2319 write_bytes (lines.lines[i].text, lines.lines[i].length, tfp);
2320 putc (eolchar, tfp);
2322 if (tfp != ofp)
2323 xfclose (tfp);
2325 xfclose (fp);
2328 free (buf.buf);
2329 free ((char *) lines.lines);
2330 free ((char *) tmp);
2332 if (n_temp_files)
2334 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
2335 i = n_temp_files;
2336 for (node = temphead.next; i > 0; node = node->next)
2337 tempfiles[--i] = node->name;
2338 merge (tempfiles, n_temp_files, ofp);
2339 free ((char *) tempfiles);
2343 /* Insert key KEY at the end of the list (`keyhead'). */
2345 static void
2346 insertkey (struct keyfield *key)
2348 struct keyfield *k = &keyhead;
2350 while (k->next)
2351 k = k->next;
2352 k->next = key;
2353 key->next = NULL;
2354 #ifdef ENABLE_NLS
2355 if (key->numeric || key->general_numeric)
2357 struct nls_keyfield *nk;
2359 nk = (struct nls_keyfield *) xmalloc (sizeof (struct nls_keyfield));
2360 nk->key = key;
2361 if (nls_keyhead)
2363 nk->next = nls_keyhead->next;
2364 nls_keyhead->next = nk;
2366 else
2367 nk->next = nk;
2368 nls_keyhead = nk;
2370 #endif
2373 static void
2374 badfieldspec (const char *s)
2376 error (SORT_FAILURE, 0, _("invalid field specification `%s'"), s);
2379 /* Handle interrupts and hangups. */
2381 static void
2382 sighandler (int sig)
2384 #ifdef SA_INTERRUPT
2385 struct sigaction sigact;
2387 sigact.sa_handler = SIG_DFL;
2388 sigemptyset (&sigact.sa_mask);
2389 sigact.sa_flags = 0;
2390 sigaction (sig, &sigact, NULL);
2391 #else /* !SA_INTERRUPT */
2392 signal (sig, SIG_DFL);
2393 #endif /* SA_INTERRUPT */
2394 cleanup ();
2395 kill (getpid (), sig);
2398 /* Set the ordering options for KEY specified in S.
2399 Return the address of the first character in S that
2400 is not a valid ordering option.
2401 BLANKTYPE is the kind of blanks that 'b' should skip. */
2403 static char *
2404 set_ordering (register const char *s, struct keyfield *key,
2405 enum blanktype blanktype)
2407 while (*s)
2409 switch (*s)
2411 case 'b':
2412 if (blanktype == bl_start || blanktype == bl_both)
2413 key->skipsblanks = 1;
2414 if (blanktype == bl_end || blanktype == bl_both)
2415 key->skipeblanks = 1;
2416 break;
2417 case 'd':
2418 key->ignore = nondictionary;
2419 break;
2420 case 'f':
2421 key->translate = fold_toupper;
2422 break;
2423 case 'g':
2424 key->general_numeric = 1;
2425 break;
2426 case 'i':
2427 key->ignore = nonprinting;
2428 break;
2429 case 'M':
2430 key->month = 1;
2431 break;
2432 case 'n':
2433 key->numeric = 1;
2434 break;
2435 case 'r':
2436 key->reverse = 1;
2437 break;
2438 default:
2439 return (char *) s;
2441 ++s;
2443 return (char *) s;
2446 static void
2447 key_init (struct keyfield *key)
2449 memset (key, 0, sizeof (*key));
2450 key->eword = -1;
2453 /* strdup and return the result of setlocale, but guard against a NULL
2454 return value. If setlocale returns NULL, strdup FAIL_VAL instead. */
2456 #if defined ENABLE_NLS && ( !defined __GLIBC__ || __GLIBC__ < 2 )
2457 static inline char *
2458 my_setlocale (const char *locale, const char *fail_val)
2460 char *s = setlocale (LC_ALL, locale);
2461 if (s == NULL)
2462 s = (char *) fail_val;
2463 return xstrdup (s);
2465 #endif
2468 main (int argc, char **argv)
2470 struct keyfield *key = NULL, gkey;
2471 char *s;
2472 int i, t, t2;
2473 int checkonly = 0, mergeonly = 0, nfiles = 0;
2474 char *minus = "-", *outfile = minus, **files, *tmp;
2475 FILE *ofp;
2476 #ifdef SA_INTERRUPT
2477 struct sigaction oldact, newact;
2478 #endif /* SA_INTERRUPT */
2480 program_name = argv[0];
2482 #ifdef ENABLE_NLS
2484 /* Determine whether the current locale is C or POSIX. */
2485 # if defined __GLIBC__ && __GLIBC__ >= 2
2486 s = setlocale (LC_ALL, "");
2487 if (s != NULL && !STREQ (s, "C") && !STREQ (s, "POSIX"))
2488 /* The current locale is neither C nor POSIX. We'll need to do
2489 more work. */
2490 need_locale = 1;
2491 # else
2493 char *c_locale_string = my_setlocale ("C", "");
2494 char *posix_locale_string = my_setlocale ("POSIX", "");
2495 char *current_locale_string = setlocale (LC_ALL, "");
2497 if (current_locale_string != NULL
2498 && !STREQ (current_locale_string, c_locale_string)
2499 && !STREQ (current_locale_string, posix_locale_string))
2500 /* The current locale is neither C nor POSIX.
2501 We'll need to do more work. */
2502 need_locale = 1;
2504 free (c_locale_string);
2505 free (posix_locale_string);
2507 # endif
2509 /* Let's get locale's representation of the decimal point */
2511 struct lconv *lconvp = localeconv ();
2513 decimal_point = *lconvp->decimal_point;
2514 th_sep = *lconvp->thousands_sep;
2515 nls_grouping = (char *) (lconvp->grouping);
2518 /* if locale doesn't define a decimal point, we'll use the
2519 US notation. */
2520 if (decimal_point == '\0')
2521 decimal_point = FLOATING_POINT;
2522 else
2523 nls_fraction_found = 0; /* Figure out which decimal point to use */
2525 nls_month_found = 0; /* Figure out which month notation to use */
2527 monthtab = nls_monthtab;
2529 #endif /* NLS */
2531 bindtextdomain (PACKAGE, LOCALEDIR);
2532 textdomain (PACKAGE);
2534 parse_long_options (argc, argv, PROGRAM_NAME, GNU_PACKAGE, VERSION,
2535 AUTHORS, usage);
2537 have_read_stdin = 0;
2538 inittables ();
2540 temp_dir = getenv ("TMPDIR");
2541 if (temp_dir == NULL)
2542 temp_dir = DEFAULT_TMPDIR;
2544 /* Change the way xmalloc and xrealloc fail. */
2545 xalloc_exit_failure = SORT_FAILURE;
2546 xalloc_fail_func = cleanup;
2548 #ifdef SA_INTERRUPT
2549 newact.sa_handler = sighandler;
2550 sigemptyset (&newact.sa_mask);
2551 newact.sa_flags = 0;
2553 sigaction (SIGINT, NULL, &oldact);
2554 if (oldact.sa_handler != SIG_IGN)
2555 sigaction (SIGINT, &newact, NULL);
2556 sigaction (SIGHUP, NULL, &oldact);
2557 if (oldact.sa_handler != SIG_IGN)
2558 sigaction (SIGHUP, &newact, NULL);
2559 sigaction (SIGPIPE, NULL, &oldact);
2560 if (oldact.sa_handler != SIG_IGN)
2561 sigaction (SIGPIPE, &newact, NULL);
2562 sigaction (SIGTERM, NULL, &oldact);
2563 if (oldact.sa_handler != SIG_IGN)
2564 sigaction (SIGTERM, &newact, NULL);
2565 #else /* !SA_INTERRUPT */
2566 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
2567 signal (SIGINT, sighandler);
2568 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
2569 signal (SIGHUP, sighandler);
2570 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
2571 signal (SIGPIPE, sighandler);
2572 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
2573 signal (SIGTERM, sighandler);
2574 #endif /* !SA_INTERRUPT */
2576 gkey.sword = gkey.eword = -1;
2577 gkey.ignore = NULL;
2578 gkey.translate = NULL;
2579 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
2580 gkey.skipsblanks = gkey.skipeblanks = 0;
2582 files = (char **) xmalloc (sizeof (char *) * argc);
2584 for (i = 1; i < argc; ++i)
2586 if (argv[i][0] == '+')
2588 if (key)
2589 insertkey (key);
2590 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
2591 key_init (key);
2592 s = argv[i] + 1;
2593 if (! (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1]))))
2594 badfieldspec (argv[i]);
2595 for (t = 0; ISDIGIT (*s); ++s)
2596 t = 10 * t + *s - '0';
2597 t2 = 0;
2598 if (*s == '.')
2599 for (++s; ISDIGIT (*s); ++s)
2600 t2 = 10 * t2 + *s - '0';
2601 if (t2 || t)
2603 key->sword = t;
2604 key->schar = t2;
2606 else
2607 key->sword = -1;
2608 s = set_ordering (s, key, bl_start);
2609 if (*s)
2610 badfieldspec (argv[i]);
2612 else if (argv[i][0] == '-' && argv[i][1])
2614 s = argv[i] + 1;
2615 if (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1])))
2617 if (!key)
2619 /* Provoke with `sort -9'. */
2620 error (0, 0, _("when using the old-style +POS and -POS \
2621 key specifiers,\nthe +POS specifier must come first"));
2622 usage (SORT_FAILURE);
2624 for (t = 0; ISDIGIT (*s); ++s)
2625 t = t * 10 + *s - '0';
2626 t2 = 0;
2627 if (*s == '.')
2628 for (++s; ISDIGIT (*s); ++s)
2629 t2 = t2 * 10 + *s - '0';
2630 key->eword = t;
2631 key->echar = t2;
2632 s = set_ordering (s, key, bl_end);
2633 if (*s)
2634 badfieldspec (argv[i]);
2635 insertkey (key);
2636 key = NULL;
2638 else
2639 while (*s)
2641 s = set_ordering (s, &gkey, bl_both);
2642 switch (*s)
2644 case '\0':
2645 break;
2646 case 'c':
2647 checkonly = 1;
2648 break;
2649 case 'k':
2650 if (s[1])
2651 ++s;
2652 else
2654 if (i == argc - 1)
2655 error (SORT_FAILURE, 0,
2656 _("option `-k' requires an argument"));
2657 else
2658 s = argv[++i];
2660 if (key)
2661 insertkey (key);
2662 key = (struct keyfield *)
2663 xmalloc (sizeof (struct keyfield));
2664 key_init (key);
2665 /* Get POS1. */
2666 if (!ISDIGIT (*s))
2667 badfieldspec (argv[i]);
2668 for (t = 0; ISDIGIT (*s); ++s)
2669 t = 10 * t + *s - '0';
2670 if (t == 0)
2672 /* Provoke with `sort -k0' */
2673 error (0, 0, _("the starting field number argument \
2674 to the `-k' option must be positive"));
2675 badfieldspec (argv[i]);
2677 --t;
2678 t2 = 0;
2679 if (*s == '.')
2681 if (!ISDIGIT (s[1]))
2683 /* Provoke with `sort -k1.' */
2684 error (0, 0, _("starting field spec has `.' but \
2685 lacks following character offset"));
2686 badfieldspec (argv[i]);
2688 for (++s; ISDIGIT (*s); ++s)
2689 t2 = 10 * t2 + *s - '0';
2690 if (t2 == 0)
2692 /* Provoke with `sort -k1.0' */
2693 error (0, 0, _("starting field character offset \
2694 argument to the `-k' option\nmust be positive"));
2695 badfieldspec (argv[i]);
2697 --t2;
2699 if (t2 || t)
2701 key->sword = t;
2702 key->schar = t2;
2704 else
2705 key->sword = -1;
2706 s = set_ordering (s, key, bl_start);
2707 if (*s == 0)
2709 key->eword = -1;
2710 key->echar = 0;
2712 else if (*s != ',')
2713 badfieldspec (argv[i]);
2714 else if (*s == ',')
2716 /* Skip over comma. */
2717 ++s;
2718 if (*s == 0)
2720 /* Provoke with `sort -k1,' */
2721 error (0, 0, _("field specification has `,' but \
2722 lacks following field spec"));
2723 badfieldspec (argv[i]);
2725 /* Get POS2. */
2726 for (t = 0; ISDIGIT (*s); ++s)
2727 t = t * 10 + *s - '0';
2728 if (t == 0)
2730 /* Provoke with `sort -k1,0' */
2731 error (0, 0, _("ending field number argument \
2732 to the `-k' option must be positive"));
2733 badfieldspec (argv[i]);
2735 --t;
2736 t2 = 0;
2737 if (*s == '.')
2739 if (!ISDIGIT (s[1]))
2741 /* Provoke with `sort -k1,1.' */
2742 error (0, 0, _("ending field spec has `.' \
2743 but lacks following character offset"));
2744 badfieldspec (argv[i]);
2746 for (++s; ISDIGIT (*s); ++s)
2747 t2 = t2 * 10 + *s - '0';
2749 else
2751 /* `-k 2,3' is equivalent to `+1 -3'. */
2752 ++t;
2754 key->eword = t;
2755 key->echar = t2;
2756 s = set_ordering (s, key, bl_end);
2757 if (*s)
2758 badfieldspec (argv[i]);
2760 insertkey (key);
2761 key = NULL;
2762 goto outer;
2763 case 'm':
2764 mergeonly = 1;
2765 break;
2766 case 'o':
2767 if (s[1])
2768 outfile = s + 1;
2769 else
2771 if (i == argc - 1)
2772 error (SORT_FAILURE, 0,
2773 _("option `-o' requires an argument"));
2774 else
2775 outfile = argv[++i];
2777 goto outer;
2778 case 's':
2779 stable = 1;
2780 break;
2781 case 't':
2782 if (s[1])
2783 tab = *++s;
2784 else if (i < argc - 1)
2786 tab = *argv[++i];
2787 goto outer;
2789 else
2790 error (SORT_FAILURE, 0,
2791 _("option `-t' requires an argument"));
2792 break;
2793 case 'T':
2794 if (s[1])
2795 temp_dir = ++s;
2796 else
2798 if (i < argc - 1)
2799 temp_dir = argv[++i];
2800 else
2801 error (SORT_FAILURE, 0,
2802 _("option `-T' requires an argument"));
2804 goto outer;
2805 /* break; */
2806 case 'u':
2807 unique = 1;
2808 break;
2809 case 'z':
2810 eolchar = 0;
2811 break;
2812 case 'y':
2813 /* Accept and ignore e.g. -y0 for compatibility with
2814 Solaris 2. */
2815 goto outer;
2816 default:
2817 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2818 argv[0], *s);
2819 usage (SORT_FAILURE);
2821 if (*s)
2822 ++s;
2825 else /* Not an option. */
2827 files[nfiles++] = argv[i];
2829 outer:;
2832 if (key)
2833 insertkey (key);
2835 /* Inheritance of global options to individual keys. */
2836 for (key = keyhead.next; key; key = key->next)
2837 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2838 && !key->skipeblanks && !key->month && !key->numeric
2839 && !key->general_numeric)
2841 key->ignore = gkey.ignore;
2842 key->translate = gkey.translate;
2843 key->skipsblanks = gkey.skipsblanks;
2844 key->skipeblanks = gkey.skipeblanks;
2845 key->month = gkey.month;
2846 key->numeric = gkey.numeric;
2847 key->general_numeric = gkey.general_numeric;
2848 key->reverse = gkey.reverse;
2851 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2852 || gkey.skipeblanks || gkey.month || gkey.numeric
2853 || gkey.general_numeric))
2854 insertkey (&gkey);
2855 reverse = gkey.reverse;
2857 if (nfiles == 0)
2859 nfiles = 1;
2860 files = &minus;
2863 if (checkonly)
2865 /* POSIX requires that sort return 1 IFF invoked with -c and the
2866 input is not properly sorted. */
2867 exit (check (files, nfiles) == 0 ? 0 : 1);
2870 if (!STREQ (outfile, "-"))
2872 struct stat outstat;
2873 if (stat (outfile, &outstat) == 0)
2875 /* The following code prevents a race condition when
2876 people use the brain dead shell programming idiom:
2877 cat file | sort -o file
2878 This feature is provided for historical compatibility,
2879 but we strongly discourage ever relying on this in
2880 new shell programs. */
2882 /* Temporarily copy each input file that might be another name
2883 for the output file. When in doubt (e.g. a pipe), copy. */
2884 for (i = 0; i < nfiles; ++i)
2886 char buf[8192];
2887 FILE *in_fp;
2888 FILE *out_fp;
2889 int cc;
2891 if (S_ISREG (outstat.st_mode) && !STREQ (outfile, files[i]))
2893 struct stat instat;
2894 if ((STREQ (files[i], "-")
2895 ? fstat (STDIN_FILENO, &instat)
2896 : stat (files[i], &instat)) != 0)
2898 error (0, errno, "%s", files[i]);
2899 cleanup ();
2900 exit (SORT_FAILURE);
2902 if (S_ISREG (instat.st_mode) && !SAME_INODE (instat, outstat))
2904 /* We know the files are distinct. */
2905 continue;
2909 in_fp = xfopen (files[i], "r");
2910 tmp = tempname ();
2911 out_fp = xtmpfopen (tmp);
2912 /* FIXME: maybe use copy.c(copy) here. */
2913 while ((cc = fread (buf, 1, sizeof buf, in_fp)) > 0)
2914 write_bytes (buf, cc, out_fp);
2915 if (ferror (in_fp))
2917 error (0, errno, "%s", files[i]);
2918 cleanup ();
2919 exit (SORT_FAILURE);
2921 xfclose (out_fp);
2922 xfclose (in_fp);
2923 files[i] = tmp;
2925 ofp = xfopen (outfile, "w");
2927 else
2929 /* A non-`-' outfile was specified, but the file doesn't yet exist.
2930 Before opening it for writing (thus creating it), make sure all
2931 of the input files exist. Otherwise, creating the output file
2932 could create an otherwise missing input file, making sort succeed
2933 when it should fail. */
2934 for (i = 0; i < nfiles; ++i)
2936 struct stat sb;
2937 if (STREQ (files[i], "-"))
2938 continue;
2939 if (stat (files[i], &sb))
2941 error (0, errno, "%s", files[i]);
2942 cleanup ();
2943 exit (SORT_FAILURE);
2947 ofp = xfopen (outfile, "w");
2950 else
2952 ofp = stdout;
2955 if (mergeonly)
2956 merge (files, nfiles, ofp);
2957 else
2958 sort (files, nfiles, ofp);
2959 cleanup ();
2961 /* If we wait for the implicit flush on exit, and the parent process
2962 has closed stdout (e.g., exec >&- in a shell), then the output file
2963 winds up empty. I don't understand why. This is under SunOS,
2964 Solaris, Ultrix, and Irix. This premature fflush makes the output
2965 reappear. --karl@cs.umb.edu */
2966 if (fflush (ofp) < 0)
2967 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2969 if (have_read_stdin && fclose (stdin) == EOF)
2970 error (SORT_FAILURE, errno, "%s", outfile);
2971 if (ferror (stdout) || fclose (stdout) == EOF)
2972 error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2974 exit (EXIT_SUCCESS);