1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 88, 1991-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
26 #include <sys/types.h>
31 #include "long-options.h"
36 /* The official name of this program (e.g., no `g' prefix). */
37 #define PROGRAM_NAME "sort"
39 #define AUTHORS "Mike Haertel"
41 #if defined ENABLE_NLS && HAVE_LANGINFO_H
42 # include <langinfo.h>
45 #if HAVE_PATHCONF && defined _PC_NAME_MAX
46 # define NAME_MAX_IN_DIR(Dir) pathconf (Dir, _PC_NAME_MAX)
48 # define NAME_MAX_IN_DIR(Dir) 255
53 /* Undefine, to avoid warning about redefinition on some systems. */
55 #define min(a, b) ((a) < (b) ? (a) : (b))
57 #define max(a, b) ((a) > (b) ? (a) : (b))
59 #define UCHAR_LIM (UCHAR_MAX + 1)
60 #define UCHAR(c) ((unsigned char) (c))
62 #ifndef DEFAULT_TMPDIR
63 # define DEFAULT_TMPDIR "/tmp"
66 /* Use this as exit status in case of error, not EXIT_FAILURE. This
67 is necessary because EXIT_FAILURE is usually 1 and POSIX requires
68 that sort exit with status 1 IFF invoked with -c and the input is
69 not properly sorted. Any other irregular exit must exit with a
70 status code greater than 1. */
71 #define SORT_FAILURE 2
73 #define FLOATING_POINT '.'
74 #define FLOATING_COMMA ','
75 #define NEGATION_SIGN '-'
76 #define NUMERIC_ZERO '0'
79 # define NLS_MEMCMP(S1, S2, Len) strncoll (S1, S2, Len)
80 # define NLS_STRNCMP(S1, S2, Len) strncoll_s2_readonly (S1, S2, Len)
82 # define NLS_MEMCMP(S1, S2, Len) memcmp (S1, S2, Len)
83 # define NLS_STRNCMP(S1, S2, Len) strncmp (S1, S2, Len)
88 static unsigned char decimal_point
;
89 static unsigned char th_sep
;
90 static char *nls_grouping
;
92 /* This is "C" locale, need another? */
93 static int need_locale
= 0;
95 /* Should we look for decimal point? */
96 static int nls_fraction_found
= 1;
98 /* Look for month notations in text? */
99 static int nls_month_found
= 1;
103 # define decimal_point FLOATING_POINT
107 /* If native language support is requested, make a 1-1 map to the
108 locale character map, otherwise ensure normal behavior. */
111 /* 12 months in a year */
112 # define NLS_NUM_MONTHS 12
114 /* Maximum number of elements, to allocate per allocation unit */
115 # define NLS_MAX_GROUPS 8
117 /* A string with one character, to enforce char collation */
118 # define NLS_ONE_CHARACTER_STRING " "
122 /* The kind of blanks for '-b' to skip in various options. */
123 enum blanktype
{ bl_start
, bl_end
, bl_both
};
125 /* The character marking end of line. Default to \n. */
128 /* Lines are held in core as counted strings. */
131 char *text
; /* Text of the line. */
132 int length
; /* Length not including final newline. */
133 char *keybeg
; /* Start of first key. */
134 char *keylim
; /* Limit of first key. */
137 /* Arrays of lines. */
140 struct line
*lines
; /* Dynamically allocated array of lines. */
141 int used
; /* Number of slots used. */
142 int alloc
; /* Number of slots allocated. */
143 int limit
; /* Max number of slots to allocate. */
149 char *buf
; /* Dynamically allocated buffer. */
150 int used
; /* Number of bytes used. */
151 int alloc
; /* Number of bytes allocated. */
152 int left
; /* Number of bytes left after line parsing. */
157 int sword
; /* Zero-origin 'word' to start at. */
158 int schar
; /* Additional characters to skip. */
159 int skipsblanks
; /* Skip leading white space at start. */
160 int eword
; /* Zero-origin first word after field. */
161 int echar
; /* Additional characters in field. */
162 int skipeblanks
; /* Skip trailing white space at finish. */
163 int *ignore
; /* Boolean array of characters to ignore. */
164 char *translate
; /* Translation applied to characters. */
165 int numeric
; /* Flag for numeric comparison. Handle
166 strings of digits with optional decimal
167 point, but no exponential notation. */
168 int general_numeric
; /* Flag for general, numeric comparison.
169 Handle numbers in exponential notation. */
170 int month
; /* Flag for comparison by month name. */
171 int reverse
; /* Reverse the sense of comparison. */
172 struct keyfield
*next
; /* Next keyfield to try. */
181 /* The name this program was run with. */
184 /* Table of white space. */
185 static int blanks
[UCHAR_LIM
];
187 /* Table of non-printing characters. */
188 static int nonprinting
[UCHAR_LIM
];
190 /* Table of non-dictionary characters (not letters, digits, or blanks). */
191 static int nondictionary
[UCHAR_LIM
];
193 /* Translation table folding lower case to upper. */
194 static char fold_toupper
[UCHAR_LIM
];
196 /* Table mapping 3-letter month names to integers.
197 Alphabetic order allows binary search. */
198 static const struct month us_monthtab
[] =
216 /* Locale may have a different idea of month names */
217 static struct month nls_monthtab
[NLS_NUM_MONTHS
];
218 static int nls_months_collide
[NLS_NUM_MONTHS
+ 1];
220 /* Numeric keys, to search for numeric format */
223 struct keyfield
*key
;
224 struct nls_keyfield
*next
;
227 static struct nls_keyfield
*nls_keyhead
= NULL
;
231 /* Which month table to use in the program, default C */
232 static const struct month
*monthtab
= us_monthtab
;
234 /* During the merge phase, the number of files to merge at once. */
237 /* Initial buffer size for in core sorting. Will not grow unless a
238 line longer than this is seen. */
239 static int sortalloc
= 512 * 1024;
241 /* Initial buffer size for in core merge buffers. Bear in mind that
242 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
243 static int mergealloc
= 16 * 1024;
245 /* Guess of average line length. */
246 static int linelength
= 30;
248 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
249 #define LINEALLOC (256 * 1024)
251 /* Directory in which any temporary files are to be created. */
252 static char *temp_dir
;
254 /* Flag to reverse the order of all comparisons. */
257 /* Flag for stable sort. This turns off the last ditch bytewise
258 comparison of lines, and instead leaves lines in the same order
259 they were read if all keys compare equal. */
262 /* Tab character separating fields. If NUL, then fields are separated
263 by the empty string between a non-whitespace character and a whitespace
267 /* Flag to remove consecutive duplicate lines from the output.
268 Only the last of a sequence of equal lines will be output. */
271 /* Nonzero if any of the input files are the standard input. */
272 static int have_read_stdin
;
274 /* Lists of key field comparisons to be tried. */
275 static struct keyfield keyhead
;
281 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
286 Usage: %s [OPTION]... [FILE]...\n\
290 Write sorted concatenation of all FILE(s) to standard output.\n\
292 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
293 -b ignore leading blanks in sort fields or keys\n\
294 -c check if given files already sorted, do not sort\n\
295 -d consider only [a-zA-Z0-9 ] characters in keys\n\
296 -f fold lower case to upper case characters in keys\n\
297 -g compare according to general numerical value, imply -b\n\
298 -i consider only [\\040-\\0176] characters in keys\n\
299 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
300 -m merge already sorted files, do not sort\n\
301 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
302 -n compare according to string numerical value, imply -b\n\
303 -o FILE write result on FILE instead of standard output\n\
304 -r reverse the result of comparisons\n\
305 -s stabilize sort by disabling last resort comparison\n\
306 -t SEP use SEParator instead of non- to whitespace transition\n\
307 -T DIRECTORY use DIRECTORY for temporary files, not $TMPDIR or %s\n\
308 -u with -c, check for strict ordering;\n\
309 with -m, only output the first of an equal sequence\n\
310 -z end lines with 0 byte, not newline, for find -print0\n\
311 --help display this help and exit\n\
312 --version output version information and exit\n\
314 POS is F[.C][OPTS], where F is the field number and C the character\n\
315 position in the field, both counted from zero. OPTS is made up of one\n\
316 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
317 for that key. If no key given, use the entire line as key. With no\n\
318 FILE, or when FILE is -, read standard input.\n\
321 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
323 /* Don't use EXIT_FAILURE here in case it is defined to be 1.
324 POSIX requires that sort return 1 IFF invoked with -c and
325 the input is not properly sorted. */
326 assert (status
== 0 || status
== SORT_FAILURE
);
330 /* The list of temporary files. */
331 static struct tempnode
334 struct tempnode
*next
;
337 /* Clean up any remaining temporary files. */
342 struct tempnode
*node
;
344 for (node
= temphead
.next
; node
; node
= node
->next
)
349 xtmpfopen (const char *file
)
354 /* Open temporary file exclusively, to foil a common
355 denial-of-service attack. */
356 fd
= open (file
, O_WRONLY
| O_CREAT
| O_TRUNC
| O_EXCL
, 0600);
357 if (fd
< 0 || (fp
= fdopen (fd
, "w")) == NULL
)
359 error (0, errno
, "%s", file
);
368 xfopen (const char *file
, const char *how
)
372 if (STREQ (file
, "-"))
378 if ((fp
= fopen (file
, how
)) == NULL
)
380 error (0, errno
, "%s", file
);
396 /* Allow reading stdin from tty more than once. */
400 else if (fp
== stdout
)
402 if (fflush (fp
) != 0)
404 error (0, errno
, _("flushing file"));
411 if (fclose (fp
) != 0)
413 error (0, errno
, _("error closing file"));
421 write_bytes (const char *buf
, size_t n_bytes
, FILE *fp
)
423 if (fwrite (buf
, 1, n_bytes
, fp
) != n_bytes
)
425 error (0, errno
, _("write error"));
431 /* Return a name for a temporary file. */
436 static unsigned int seq
;
437 int len
= strlen (temp_dir
);
438 char *name
= xmalloc (len
+ 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
439 int long_file_names
= NAME_MAX_IN_DIR (temp_dir
) > 12;
440 struct tempnode
*node
;
442 /* If long filenames aren't supported, we cannot use filenames
443 longer than 8+3 and still assume they are unique. */
446 "%s%ssort%5.5d%5.5d",
448 (len
&& temp_dir
[len
- 1] != '/') ? "/" : "",
449 (unsigned int) getpid () & 0xffff, seq
);
451 sprintf (name
, "%s%ss%5.5d%2.2d.%3.3d",
453 (len
&& temp_dir
[len
- 1] != '/') ? "/" : "",
454 (unsigned int) getpid () & 0xffff, seq
/ 1000, seq
% 1000);
458 /* Make sure that SEQ's value fits in 5 digits if temp_dir is on
459 an 8.3 filesystem. */
460 if (!long_file_names
&& seq
>= 100000)
463 node
= (struct tempnode
*) xmalloc (sizeof (struct tempnode
));
465 node
->next
= temphead
.next
;
466 temphead
.next
= node
;
470 /* Search through the list of temporary files for NAME;
471 remove it if it is found on the list. */
474 zaptemp (const char *name
)
476 struct tempnode
*node
, *temp
;
478 for (node
= &temphead
; node
->next
; node
= node
->next
)
479 if (STREQ (name
, node
->next
->name
))
486 node
->next
= temp
->next
;
487 free ((char *) temp
);
492 /* Initialize the character class tables. */
495 nls_sort_month_comp (const void *m1
, const void *m2
)
497 return strcoll (((const struct month
*) m1
)->name
,
498 ((const struct month
*) m2
)->name
);
501 /* Do collation on strings S1 and S2, but for at most L characters.
502 we use the fact, that we KNOW that LEN is the min of the two lengths */
504 strncoll (char *s1
, char *s2
, int len
)
510 /* Emulate a strncoll function, by forcing strcoll to compare
511 only the first LEN characters in each string. */
512 register unsigned char n1
= s1
[len
];
513 register unsigned char n2
= s2
[len
];
515 s1
[len
] = s2
[len
] = 0;
516 diff
= strcoll (s1
, s2
);
522 diff
= memcmp (s1
, s2
, len
);
528 /* Do collation on strings S1 and S2, but for at most L characters.
529 Use the fact, that we KNOW that S2 is the shorter string and has
532 strncoll_s2_readonly (char *s1
, const char *s2
, int len
)
536 assert (len
== strlen (s2
));
537 assert (len
<= strlen (s1
));
541 /* Emulate a strncoll function, by forcing strcoll to compare
542 only the first LEN characters in each string. */
543 register unsigned char n1
= s1
[len
];
546 diff
= strcoll (s1
, s2
);
551 diff
= memcmp (s1
, s2
, len
);
564 for (i
= 0; i
< UCHAR_LIM
; ++i
)
570 if (!ISALNUM (i
) && !ISBLANK (i
))
571 nondictionary
[i
] = 1;
573 fold_toupper
[i
] = toupper (i
);
578 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
579 /* If We're not in the "C" locale, read in different names for months. */
582 nls_months_collide
[0] = 1; /* if an error, look again */
583 for (i
= 0; i
< NLS_NUM_MONTHS
; i
++)
589 s
= (char *) nl_langinfo (ABMON_1
+ us_monthtab
[i
].val
- 1);
591 nls_monthtab
[i
].name
= (char *) xmalloc (s_len
+ 1);
592 nls_monthtab
[i
].val
= us_monthtab
[i
].val
;
594 /* Be careful: abreviated month names
595 may be longer than the usual 3 characters. */
596 for (j
= 0; j
< s_len
; j
++)
597 nls_monthtab
[i
].name
[j
] = fold_toupper
[UCHAR (s
[j
])];
598 nls_monthtab
[i
].name
[j
] = '\0';
600 nls_months_collide
[nls_monthtab
[i
].val
] = 0;
601 for (j
= 0; j
< NLS_NUM_MONTHS
; ++j
)
603 if (STREQ (nls_monthtab
[i
].name
, us_monthtab
[i
].name
))
605 /* There are indeed some month names in English which
606 collide with the NLS name. */
607 nls_months_collide
[nls_monthtab
[i
].val
] = 1;
612 /* Now quicksort the month table (should be sorted already!).
613 However, another locale doesn't rule out the possibility
614 of a different order of month names. */
615 qsort ((void *) nls_monthtab
, NLS_NUM_MONTHS
,
616 sizeof (struct month
), nls_sort_month_comp
);
617 monthtab
= nls_monthtab
;
622 /* Initialize BUF, allocating ALLOC bytes initially. */
625 initbuf (struct buffer
*buf
, int alloc
)
628 buf
->buf
= xmalloc (buf
->alloc
);
629 buf
->used
= buf
->left
= 0;
632 /* Fill BUF reading from FP, moving buf->left bytes from the end
633 of buf->buf to the beginning first. If EOF is reached and the
634 file wasn't terminated by a newline, supply one. Return a count
635 of bytes buffered. */
638 fillbuf (struct buffer
*buf
, FILE *fp
)
642 memmove (buf
->buf
, buf
->buf
+ buf
->used
- buf
->left
, buf
->left
);
643 buf
->used
= buf
->left
;
645 while (!feof (fp
) && (buf
->used
== 0
646 || !memchr (buf
->buf
, eolchar
, buf
->used
)))
648 if (buf
->used
== buf
->alloc
)
651 buf
->buf
= xrealloc (buf
->buf
, buf
->alloc
);
653 cc
= fread (buf
->buf
+ buf
->used
, 1, buf
->alloc
- buf
->used
, fp
);
656 error (0, errno
, _("read error"));
663 if (feof (fp
) && buf
->used
&& buf
->buf
[buf
->used
- 1] != eolchar
)
665 if (buf
->used
== buf
->alloc
)
668 buf
->buf
= xrealloc (buf
->buf
, buf
->alloc
);
670 buf
->buf
[buf
->used
++] = eolchar
;
676 /* Initialize LINES, allocating space for ALLOC lines initially.
677 LIMIT is the maximum possible number of lines to allocate space
681 initlines (struct lines
*lines
, int alloc
, int limit
)
683 lines
->alloc
= alloc
;
684 lines
->lines
= (struct line
*) xmalloc (lines
->alloc
* sizeof (struct line
));
686 lines
->limit
= limit
;
689 /* Return a pointer to the first character of the field specified
693 begfield (const struct line
*line
, const struct keyfield
*key
)
695 register char *ptr
= line
->text
, *lim
= ptr
+ line
->length
;
696 register int sword
= key
->sword
, schar
= key
->schar
;
699 while (ptr
< lim
&& sword
--)
701 while (ptr
< lim
&& *ptr
!= tab
)
707 while (ptr
< lim
&& sword
--)
709 while (ptr
< lim
&& blanks
[UCHAR (*ptr
)])
711 while (ptr
< lim
&& !blanks
[UCHAR (*ptr
)])
715 if (key
->skipsblanks
)
716 while (ptr
< lim
&& blanks
[UCHAR (*ptr
)])
719 if (ptr
+ schar
<= lim
)
727 /* Return the limit of (a pointer to the first character after) the field
728 in LINE specified by KEY. */
731 limfield (const struct line
*line
, const struct keyfield
*key
)
733 register char *ptr
= line
->text
, *lim
= ptr
+ line
->length
;
734 register int eword
= key
->eword
, echar
= key
->echar
;
736 /* Note: from the POSIX spec:
737 The leading field separator itself is included in
738 a field when -t is not used. FIXME: move this comment up... */
740 /* Move PTR past EWORD fields or to one past the last byte on LINE,
741 whichever comes first. If there are more than EWORD fields, leave
742 PTR pointing at the beginning of the field having zero-based index,
743 EWORD. If a delimiter character was specified (via -t), then that
744 `beginning' is the first character following the delimiting TAB.
745 Otherwise, leave PTR pointing at the first `blank' character after
746 the preceding field. */
748 while (ptr
< lim
&& eword
--)
750 while (ptr
< lim
&& *ptr
!= tab
)
752 if (ptr
< lim
&& (eword
|| echar
> 0))
756 while (ptr
< lim
&& eword
--)
758 while (ptr
< lim
&& blanks
[UCHAR (*ptr
)])
760 while (ptr
< lim
&& !blanks
[UCHAR (*ptr
)])
764 #ifdef POSIX_UNSPECIFIED
765 /* The following block of code makes GNU sort incompatible with
766 standard Unix sort, so it's ifdef'd out for now.
767 The POSIX spec isn't clear on how to interpret this.
768 FIXME: request clarification.
770 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
771 Date: Thu, 30 May 96 12:20:41 -0400
773 [...]I believe I've found another bug in `sort'.
778 $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
781 $ /bin/sort +0.6 -0.7 </tmp/sort.in
785 Unix sort produced the answer I expected: sort on the single character
786 in column 6. GNU sort produced different results, because it disagrees
787 on the interpretation of the key-end spec "-M.N". Unix sort reads this
788 as "skip M fields, then N characters"; but GNU sort wants it to mean
789 "skip M fields, then either N characters or the rest of the current
790 field, whichever comes first". This extra clause applies only to
791 key-ends, not key-starts.
794 /* Make LIM point to the end of (one byte past) the current field. */
798 newlim
= memchr (ptr
, tab
, lim
- ptr
);
806 while (newlim
< lim
&& blanks
[UCHAR (*newlim
)])
808 while (newlim
< lim
&& !blanks
[UCHAR (*newlim
)])
814 /* If we're skipping leading blanks, don't start counting characters
815 until after skipping past any leading blanks. */
816 if (key
->skipsblanks
)
817 while (ptr
< lim
&& blanks
[UCHAR (*ptr
)])
820 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
821 if (ptr
+ echar
<= lim
)
832 trim_trailing_blanks (const char *a_start
, char **a_end
)
834 while (*a_end
> a_start
&& blanks
[UCHAR (*(*a_end
- 1))])
838 /* Find the lines in BUF, storing pointers and lengths in LINES.
839 Also replace newlines in BUF with NULs. */
842 findlines (struct buffer
*buf
, struct lines
*lines
)
844 register char *beg
= buf
->buf
, *lim
= buf
->buf
+ buf
->used
, *ptr
;
845 struct keyfield
*key
= keyhead
.next
;
849 while (beg
< lim
&& (ptr
= memchr (beg
, eolchar
, lim
- beg
))
850 && lines
->used
< lines
->limit
)
852 /* There are various places in the code that rely on a NUL
853 being at the end of in-core lines; NULs inside the lines
854 will not cause trouble, though. */
857 if (lines
->used
== lines
->alloc
)
860 lines
->lines
= (struct line
*)
861 xrealloc ((char *) lines
->lines
,
862 lines
->alloc
* sizeof (struct line
));
865 lines
->lines
[lines
->used
].text
= beg
;
866 lines
->lines
[lines
->used
].length
= ptr
- beg
;
868 /* Precompute the position of the first key for efficiency. */
872 lines
->lines
[lines
->used
].keylim
=
873 limfield (&lines
->lines
[lines
->used
], key
);
875 lines
->lines
[lines
->used
].keylim
= ptr
;
878 lines
->lines
[lines
->used
].keybeg
=
879 begfield (&lines
->lines
[lines
->used
], key
);
882 if (key
->skipsblanks
)
883 while (blanks
[UCHAR (*beg
)])
885 lines
->lines
[lines
->used
].keybeg
= beg
;
887 if (key
->skipeblanks
)
889 trim_trailing_blanks (lines
->lines
[lines
->used
].keybeg
,
890 &lines
->lines
[lines
->used
].keylim
);
895 lines
->lines
[lines
->used
].keybeg
= 0;
896 lines
->lines
[lines
->used
].keylim
= 0;
903 buf
->left
= lim
- beg
;
906 /* Compare strings A and B containing decimal fractions < 1. Each string
907 should begin with a decimal point followed immediately by the digits
908 of the fraction. Strings not of this form are considered to be zero. */
910 /* The goal here, is to take two numbers a and b... compare these
911 in parallel. Instead of converting each, and then comparing the
912 outcome. Most likely stopping the comparison before the conversion
913 is complete. The algorithm used, in the old sort:
915 Algorithm: fraccompare
916 Action : compare two decimal fractions
917 accepts : char *a, char *b
918 returns : -1 if a<b, 0 if a=b, 1 if a>b.
921 if *a == decimal_point AND *b == decimal_point
922 find first character different in a and b.
923 if both are digits, return the difference *a - *b.
926 if digit return 1, else 0
929 if digit return -1, else 0
930 if *a is a decimal_point
931 skip past decimal_point and zeroes
932 if digit return 1, else 0
933 if *b is a decimal_point
934 skip past decimal_point and zeroes
935 if digit return -1, else 0
938 The above implementation duplicates code, and thus there is room
940 the difference in code of a and b, is solved by using a
941 reference to s, assigned to either a or b. and using diff
942 to denote return value.
943 the difference in either that start being a digit or
944 the decimal point, is solved by testing if either is
945 a decimal point, or if the other is a digit...
947 if *a or *b is a decimal_point
948 skip all chars where *a == *b
949 if *a and *b are digits
951 if *a is a digit or *a is a decimal_point
957 skip decimal_point in s
963 #define USE_NEW_FRAC_COMPARE
964 #ifdef USE_NEW_FRAC_COMPARE
967 fraccompare (register const char *a
, register const char *b
)
970 nls_fraction_found
= 1;
973 if (*a
== decimal_point
|| *b
== decimal_point
)
975 register const char *s
;
986 if (ISDIGIT (*a
) && ISDIGIT (*b
))
989 if (*a
== decimal_point
|| (ISDIGIT (*a
) && *b
!= decimal_point
))
999 if (*s
== decimal_point
)
1001 while (*s
== NUMERIC_ZERO
)
1011 fraccompare (register const char *a
, register const char *b
)
1013 register int tmpa
= *a
;
1014 register int tmpb
= *b
;
1016 if (tmpa
== decimal_point
&& tmpb
== decimal_point
)
1019 tmpa
= *++a
, tmpb
= *++b
;
1020 while (tmpa
== tmpb
&& ISDIGIT (tmpa
));
1021 if (ISDIGIT (tmpa
) && ISDIGIT (tmpb
))
1025 while (tmpa
== NUMERIC_ZERO
)
1033 while (tmpb
== NUMERIC_ZERO
)
1041 else if (tmpa
== decimal_point
)
1045 while (tmpa
== NUMERIC_ZERO
);
1050 else if (tmpb
== decimal_point
)
1054 while (tmpb
== NUMERIC_ZERO
);
1063 /* Compare strings A and B as numbers without explicitly converting them to
1064 machine numbers. Comparatively slow for short strings, but asymptotically
1067 /* The code here, is like the above... continuous reoccurrance of the
1068 same code... improved 15-JAN-1997 in connection with native languages
1073 /* Decide the kind of fraction the program will use */
1075 nls_set_fraction (register unsigned char ch
)
1077 if (!nls_fraction_found
&& ch
!= decimal_point
)
1079 if (ch
== FLOATING_POINT
)
1081 decimal_point
= FLOATING_POINT
;
1082 th_sep
= FLOATING_COMMA
;
1084 else if (ch
== FLOATING_COMMA
)
1086 decimal_point
= FLOATING_COMMA
;
1087 th_sep
= FLOATING_POINT
;
1089 else if (ch
!= decimal_point
)
1095 nls_fraction_found
= 1;
1098 /* Look for a fraction
1099 It isn't as simple as it looks... however, consider a number:
1102 It's easy to tell which is a decimal point, and which isn't. We use
1103 the grouping information to find out how many digits are grouped together
1104 for thousand separator.
1106 The idea here, is to use the grouping information... but not to
1107 spend time with verifying the groups... not too much time, anyway.
1108 so, a number represented to us as:
1110 will be taken and separated into different groups, separated by a
1111 separator character (Decimal point or thousands separator).
1113 these are the groups of digits that lead to a separator character,
1114 and with the trailing group is added:
1116 resulting in 4 groups of numbers. If the resulting number of groups,
1117 are none, or just 1... this is not enough to decide anything about
1118 the decimal point. We need at least two for that. With two groups
1119 we have at least one separator. That separator can be a decimal
1120 point, or a thousands separator... if it is a thousands separator
1121 the number of digits in the last group, will comply with the first
1122 rule in the grouping rule for numeric values. i.e.
1123 |{89}| = grouping[0]
1124 if so, and there are only two groups of numbers, the value cannot
1125 be determined. If there are three or more numbers, the separator
1126 separating the groups is checked. If these are the same, the
1127 character is determined to be a thousands separator. If they are
1128 not the same, the last separator is determined to be a decimal
1129 point. If checking the grouping rules, we find out that there
1130 are no grouping rules defined, either the grouping rules is NULL
1131 or the first grouping number is 0, then the locale format is used.
1133 We try to take an advantage of a special situation. If the trailing
1134 group, the one that normally should be the fractional part, turns
1135 out to have the same length as the thousands separator rule says,
1136 making a doubt on that it may be a decimal point, we look for the
1137 group before that, i.e. with a two group form:
1139 where the grouping rule is 3;3... we take a look at group 1, and find
1140 out that |{1234}| > larger of the two first grouping rules, then
1141 the separator has to be a decimal point...
1145 look_for_fraction (const char *s
, const char *e
)
1147 register const char *p
;
1148 register unsigned short n
= 0;
1149 static unsigned short max_groups
= 0;
1150 static unsigned short *groups
= NULL
;
1154 max_groups
= NLS_MAX_GROUPS
;
1155 groups
= (unsigned short *) xmalloc (sizeof (*groups
) * max_groups
);
1158 /* skip blanks and signs */
1159 while (blanks
[UCHAR (*s
)] || *s
== NEGATION_SIGN
)
1161 /* groups = {}, n = 0 */
1162 for (p
= s
; p
< e
; p
++)
1164 /* groups[n]={number of digits leading to separator n}
1165 n = number of separators so far */
1166 if (*p
== decimal_point
|| *p
== th_sep
|| *p
== FLOATING_POINT
)
1168 if (++n
>= max_groups
)
1170 /* BIG Number... enlarge table */
1171 max_groups
+= NLS_MAX_GROUPS
;
1172 groups
= (unsigned short *) xrealloc ((char *) groups
,
1176 groups
[n
] = (unsigned short) (p
- s
);
1179 else if (!ISDIGIT (*p
))
1181 /* mem[s..p]=digits only */
1183 /* n = number of separators in s..e */
1184 groups
[++n
] = (short) (p
- s
);
1185 /* n = groups in the number */
1187 return; /* Only one group of numbers... not enough */
1189 /* p = address of group rules
1190 s = address of next character after separator */
1191 s
= s
- 1; /* s = address of last separator */
1194 /* a legal trailing group, iff groups[n] == first rule */
1195 if (groups
[n
] != (short) *p
)
1196 nls_set_fraction (*s
);
1200 { /* Only two groups */
1201 if (groups
[n
- 1] > max (p
[0], p
[1]))
1202 nls_set_fraction (*s
);
1205 /* if the separators are the same, it's a thousands */
1206 if (*s
!= *(s
- groups
[n
]))
1207 nls_set_fraction (*s
);
1208 /* s[0] = thousands separator */
1209 else if (*s
== th_sep
)
1210 nls_fraction_found
= 1;
1215 /* no grouping allowed here, last separator IS decimal point */
1216 nls_set_fraction (*s
);
1221 numcompare (register const char *a
, register const char *b
)
1223 int ret_code
= 1; /* normal return status, see later in code */
1224 int diff
= 0; /* difference between two digits */
1226 while (blanks
[UCHAR (*a
)])
1228 while (blanks
[UCHAR (*b
)])
1231 /* next character in a,b is non-blank */
1232 if ((*a
== NEGATION_SIGN
|| *b
== NEGATION_SIGN
) && *a
!= *b
)
1234 /* a < 0, or b < 0, but not both */
1235 if (*a
== NEGATION_SIGN
)
1236 ret_code
= -1, ++a
; /* a looks < b */
1237 else if (*b
== NEGATION_SIGN
)
1238 ret_code
= 1, ++b
; /* b looks < a */
1239 /* bypass zeroes, decimal points, and thousand sep in a & b */
1240 while (*a
== NUMERIC_ZERO
|| (th_sep
&& *a
== th_sep
)
1241 || *a
== decimal_point
)
1244 while (*b
== NUMERIC_ZERO
|| (th_sep
&& *b
== th_sep
)
1245 || *b
== decimal_point
)
1248 if (ISDIGIT (*a
) || ISDIGIT (*b
))
1249 /* here, either a or b or both are digits
1250 if a and b are digits, the signed one is the lesser.
1251 if a is a digit, and not b.. it means b==0, and if b==0
1252 than either is signed if b is signed then -0 < a
1253 or if a is signed then -a < 0. The ret_code is already set
1254 to mark that the signed number is the lesser, so we just
1255 return that number here. */
1258 /* *a and *b are neither digits, they are equal -0 == +0 */
1263 /* either both numbers are signed, or both are not-signed */
1264 if (*a
== NEGATION_SIGN
)
1270 /* if both are signed, then remember -100 < -10 (ret_code reversed!) */
1272 /* Skip any leading zeroes */
1273 while (*a
== NUMERIC_ZERO
)
1275 while (*b
== NUMERIC_ZERO
)
1280 /* skip all equal digits */
1281 while (ISDIGIT (*a
) && ISDIGIT (*b
) && *a
== *b
)
1284 /* Here, we have either different digits, or possible fractions
1285 or thousand separators. */
1287 if (ISDIGIT (*a
) && ISDIGIT (*b
))
1290 diff
= ((*a
) - (*b
)); /* simple, isn't it? not quite */
1292 goto continue_thousands
;
1295 /* now, here either may be a fraction, or a thousand separator...
1297 /* We've decided what are decimal_points, and what are thousands sep */
1298 if ((th_sep
!= 0) && (*a
== th_sep
|| *b
== th_sep
))
1304 goto continue_thousands
; /* Ugly, but better than a while(1) */
1308 return ret_code
; /* a has more digits than b */
1310 return ret_code
* -1; /* b has more digits than a */
1312 /* now, we should have the fractions solved */
1313 if ((diff
== 0) && (*a
== decimal_point
|| *b
== decimal_point
))
1314 return ret_code
* fraccompare (a
, b
);
1316 return diff
; /* fall through here, and diff decides */
1321 numcompare (register const char *a
, register const char *b
)
1323 register int tmpa
, tmpb
, loga
, logb
, tmp
;
1328 while (blanks
[tmpa
])
1329 tmpa
= UCHAR (*++a
);
1330 while (blanks
[tmpb
])
1331 tmpb
= UCHAR (*++b
);
1333 if (tmpa
== NEGATION_SIGN
)
1336 tmpa
= UCHAR (*++a
);
1337 while (tmpa
== NUMERIC_ZERO
);
1338 if (tmpb
!= NEGATION_SIGN
)
1340 if (tmpa
== decimal_point
)
1343 while (tmpa
== NUMERIC_ZERO
);
1346 while (tmpb
== NUMERIC_ZERO
)
1347 tmpb
= UCHAR (*++b
);
1348 if (tmpb
== decimal_point
)
1351 while (tmpb
== NUMERIC_ZERO
);
1357 tmpb
= UCHAR (*++b
);
1358 while (tmpb
== NUMERIC_ZERO
);
1360 while (tmpa
== tmpb
&& ISDIGIT (tmpa
))
1361 tmpa
= UCHAR (*++a
), tmpb
= UCHAR (*++b
);
1363 if ((tmpa
== decimal_point
&& !ISDIGIT (tmpb
))
1364 || (tmpb
== decimal_point
&& !ISDIGIT (tmpa
)))
1365 return -fraccompare (a
, b
);
1368 for (loga
= 1; ISDIGIT (UCHAR (*++a
)); ++loga
)
1374 for (logb
= 1; ISDIGIT (UCHAR (*++b
)); ++logb
)
1379 if ((tmp
= logb
- loga
) != 0)
1387 else if (tmpb
== NEGATION_SIGN
)
1390 tmpb
= UCHAR (*++b
);
1391 while (tmpb
== NUMERIC_ZERO
);
1392 if (tmpb
== decimal_point
)
1395 while (tmpb
== NUMERIC_ZERO
);
1398 while (tmpa
== NUMERIC_ZERO
)
1399 tmpa
= UCHAR (*++a
);
1400 if (tmpa
== decimal_point
)
1402 tmpa
= UCHAR (*++a
);
1403 while (tmpa
== NUMERIC_ZERO
);
1410 while (tmpa
== NUMERIC_ZERO
)
1411 tmpa
= UCHAR (*++a
);
1412 while (tmpb
== NUMERIC_ZERO
)
1413 tmpb
= UCHAR (*++b
);
1415 while (tmpa
== tmpb
&& ISDIGIT (tmpa
))
1416 tmpa
= UCHAR (*++a
), tmpb
= UCHAR (*++b
);
1418 if ((tmpa
== decimal_point
&& !ISDIGIT (tmpb
))
1419 || (tmpb
== decimal_point
&& !ISDIGIT (tmpa
)))
1420 return fraccompare (a
, b
);
1423 for (loga
= 1; ISDIGIT (UCHAR (*++a
)); ++loga
)
1429 for (logb
= 1; ISDIGIT (UCHAR (*++b
)); ++logb
)
1434 if ((tmp
= loga
- logb
) != 0)
1446 general_numcompare (const char *sa
, const char *sb
)
1449 /* FIXME: add option to warn about failed conversions. */
1450 /* FIXME: maybe add option to try expensive FP conversion
1451 only if A and B can't be compared more cheaply/accurately. */
1452 if (xstrtod (sa
, NULL
, &a
))
1456 if (xstrtod (sb
, NULL
, &b
))
1460 return a
== b
? 0 : a
< b
? -1 : 1;
1463 /* Return an integer in 1..12 of the month name S with length LEN.
1464 Return 0 if the name in S is not recognized. */
1467 getmonth (const char *s
, int len
)
1470 register int i
, lo
= 0, hi
= 12, result
;
1472 while (len
> 0 && blanks
[UCHAR (*s
)])
1481 month
= (char *) alloca (len
+ 1);
1482 for (i
= 0; i
< len
; ++i
)
1483 month
[i
] = fold_toupper
[UCHAR (s
[i
])];
1484 while (blanks
[UCHAR (month
[i
- 1])])
1490 int ix
= (lo
+ hi
) / 2;
1492 len
= strlen (monthtab
[ix
].name
);
1493 if (NLS_STRNCMP (month
, monthtab
[ix
].name
, len
) < 0)
1498 while (hi
- lo
> 1);
1500 result
= (!strncmp (month
, monthtab
[lo
].name
, len
) ? monthtab
[lo
].val
: 0);
1506 /* Look for the month in locale table, and if that fails try with
1507 us month name table */
1509 nls_month_is_either_locale (const char *s
, int len
)
1513 monthtab
= nls_monthtab
;
1514 ind
= getmonth (s
, len
);
1517 monthtab
= us_monthtab
;
1518 ind
= getmonth (s
, len
);
1524 /* Compare two lines A and B trying every key in sequence until there
1525 are no more keys or a difference is found. */
1528 keycompare (const struct line
*a
, const struct line
*b
)
1530 register char *texta
, *textb
, *lima
, *limb
;
1531 register unsigned char *translate
;
1532 register int *ignore
;
1533 struct keyfield
*key
;
1534 int diff
= 0, iter
= 0, lena
, lenb
;
1536 for (key
= keyhead
.next
; key
; key
= key
->next
, ++iter
)
1538 int comparable_lengths
= 1;
1540 ignore
= key
->ignore
;
1541 translate
= (unsigned char *) key
->translate
;
1543 /* Find the beginning and limit of each field. */
1544 if (iter
|| a
->keybeg
== NULL
|| b
->keybeg
== NULL
)
1546 if (key
->eword
>= 0)
1547 lima
= limfield (a
, key
), limb
= limfield (b
, key
);
1549 lima
= a
->text
+ a
->length
, limb
= b
->text
+ b
->length
;
1551 if (key
->sword
>= 0)
1552 texta
= begfield (a
, key
), textb
= begfield (b
, key
);
1555 texta
= a
->text
, textb
= b
->text
;
1556 if (key
->skipsblanks
)
1558 while (texta
< lima
&& blanks
[UCHAR (*texta
)])
1560 while (textb
< limb
&& blanks
[UCHAR (*textb
)])
1567 /* For the first iteration only, the key positions have
1568 been precomputed for us. */
1569 texta
= a
->keybeg
, lima
= a
->keylim
;
1570 textb
= b
->keybeg
, limb
= b
->keylim
;
1573 /* Find the lengths. */
1574 lena
= lima
- texta
, lenb
= limb
- textb
;
1580 if (key
->skipeblanks
)
1582 char *a_end
= texta
+ lena
;
1583 char *b_end
= textb
+ lenb
;
1584 trim_trailing_blanks (texta
, &a_end
);
1585 trim_trailing_blanks (textb
, &b_end
);
1586 lena
= a_end
- texta
;
1587 lenb
= b_end
- textb
;
1590 /* Actually compare the fields. */
1595 char savea
= *lima
, saveb
= *limb
;
1597 *lima
= *limb
= '\0';
1598 diff
= numcompare (texta
, textb
);
1599 *lima
= savea
, *limb
= saveb
;
1602 diff
= numcompare (texta
, textb
);
1605 return key
->reverse
? -diff
: diff
;
1608 else if (key
->general_numeric
)
1612 char savea
= *lima
, saveb
= *limb
;
1614 *lima
= *limb
= '\0';
1615 diff
= general_numcompare (texta
, textb
);
1616 *lima
= savea
, *limb
= saveb
;
1619 diff
= general_numcompare (texta
, textb
);
1622 return key
->reverse
? -diff
: diff
;
1625 else if (key
->month
)
1629 /* if we haven't decided which locale to go with, we get the
1630 month name from either. If either month name is fully
1631 solved and the month name doesn't collide with the other
1632 locale... then use that table from there forward */
1633 if (!nls_month_found
)
1637 x
= nls_month_is_either_locale (texta
, lena
);
1638 nls_month_found
= !nls_months_collide
[x
];
1639 if (nls_month_found
)
1641 diff
= x
- getmonth (textb
, lenb
);
1645 diff
= nls_month_is_either_locale (textb
, lenb
);
1646 nls_month_found
= !nls_months_collide
[diff
];
1652 diff
= getmonth (texta
, lena
) - getmonth (textb
, lenb
);
1654 return key
->reverse
? -diff
: diff
;
1659 /* Sorting like this may become slow, so in a simple locale the user
1660 can select a faster sort that is similar to ascii sort */
1661 else if (need_locale
)
1663 /* FIXME: consider making parameters non-const, then when
1664 both ignore and translate are NULL (which should be most
1665 of the time) we could temporarily NUL-terminate them in
1666 place and avoid the copy. */
1668 char *copy_a
= (char *) alloca (lena
+ 1);
1669 char *copy_b
= (char *) alloca (lenb
+ 1);
1670 int new_len_a
, new_len_b
, i
;
1672 /* We can't use strcoll directly on the two strings,
1673 but rather must extract the text for the key
1674 (to NUL-terminate for strcoll) and handle any
1675 'ignore' and/or 'translate' before comparing. */
1676 for (new_len_a
= new_len_b
= i
= 0; i
< max (lena
, lenb
); i
++)
1680 copy_a
[new_len_a
] = (translate
1681 ? translate
[UCHAR (texta
[i
])]
1683 if (!ignore
|| !ignore
[UCHAR (texta
[i
])])
1688 copy_b
[new_len_b
] = (translate
1689 ? translate
[UCHAR (textb
[i
])]
1691 if (!ignore
|| !ignore
[UCHAR (textb
[i
])])
1696 copy_a
[new_len_a
] = copy_b
[new_len_b
] = 0;
1698 diff
= strcoll (copy_a
, copy_b
);
1700 /* Free copy_a and copy_b. */
1704 return key
->reverse
? -diff
: diff
;
1709 else if (ignore
&& translate
)
1711 #define CMP_WITH_IGNORE(A, B) \
1714 while (texta < lima && textb < limb) \
1716 while (texta < lima && ignore[UCHAR (*texta)]) \
1718 while (textb < limb && ignore[UCHAR (*textb)]) \
1720 if (texta < lima && textb < limb) \
1724 diff = UCHAR (A) - UCHAR (B); \
1731 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1733 else if (texta < lima && textb == limb \
1734 && !ignore[UCHAR (*texta)]) \
1740 while (texta < lima && ignore[UCHAR (*texta)]) \
1742 while (textb < limb && ignore[UCHAR (*textb)]) \
1745 if (texta == lima && textb < limb) \
1747 else if (texta < lima && textb == limb) \
1750 /* Relative lengths are meaningless if characters were ignored. \
1751 Handling this case here avoids what might be an invalid length \
1752 comparison below. */ \
1753 if (diff == 0 && texta == lima && textb == limb) \
1754 comparable_lengths = 0; \
1758 CMP_WITH_IGNORE (translate
[UCHAR (*texta
)], translate
[UCHAR (*textb
)]);
1760 CMP_WITH_IGNORE (UCHAR (*texta
), UCHAR (*textb
));
1762 while (texta
< lima
&& textb
< limb
)
1764 if (translate
[UCHAR (*texta
++)] != translate
[UCHAR (*textb
++)])
1766 diff
= (UCHAR (translate
[UCHAR (*--texta
)])
1767 - UCHAR (translate
[UCHAR (*--textb
)]));
1773 diff
= NLS_MEMCMP (texta
, textb
, min (lena
, lenb
));
1777 return key
->reverse
? -diff
: diff
;
1778 if (comparable_lengths
&& (diff
= lena
- lenb
) != 0)
1779 return key
->reverse
? -diff
: diff
;
1785 /* Compare two lines A and B, returning negative, zero, or positive
1786 depending on whether A compares less than, equal to, or greater than B. */
1789 compare (register const struct line
*a
, register const struct line
*b
)
1791 int diff
, tmpa
, tmpb
, mini
;
1793 /* First try to compare on the specified keys (if any).
1794 The only two cases with no key at all are unadorned sort,
1795 and unadorned sort -r. */
1798 diff
= keycompare (a
, b
);
1801 if (unique
|| stable
)
1805 /* If the keys all compare equal (or no keys were specified)
1806 fall through to the default byte-by-byte comparison. */
1807 tmpa
= a
->length
, tmpb
= b
->length
;
1808 mini
= min (tmpa
, tmpb
);
1813 char *ap
= a
->text
, *bp
= b
->text
;
1816 if (need_locale
) /* want absolutely correct sorting */
1818 diff
= strcoll (ap
, bp
);
1819 return reverse
? -diff
: diff
;
1822 diff
= UCHAR (*ap
) - UCHAR (*bp
);
1825 diff
= NLS_MEMCMP (ap
, bp
, mini
);
1831 return reverse
? -diff
: diff
;
1834 /* Check that the lines read from the given FP come in order. Print a
1835 diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1836 the line number of the first out-of-order line (counting from 1) if they
1837 are not in order. Otherwise, print no diagnostic and return zero. */
1840 checkfp (FILE *fp
, const char *file_name
)
1842 struct buffer buf
; /* Input buffer. */
1843 struct lines lines
; /* Lines scanned from the buffer. */
1844 struct line temp
; /* Copy of previous line. */
1845 int cc
; /* Character count. */
1847 int line_number
= 1;
1848 struct line
*disorder_line
;
1849 int disorder_line_number
= 0;
1851 #ifdef lint /* Suppress `used before initialized' warning. */
1852 disorder_line
= NULL
;
1855 initbuf (&buf
, mergealloc
);
1856 initlines (&lines
, mergealloc
/ linelength
+ 1,
1857 LINEALLOC
/ ((NMERGE
+ NMERGE
) * sizeof (struct line
)));
1859 temp
.text
= xmalloc (alloc
);
1861 cc
= fillbuf (&buf
, fp
);
1865 findlines (&buf
, &lines
);
1869 struct line
*prev_line
; /* Pointer to previous line. */
1870 int cmp
; /* Result of calling compare. */
1873 /* Compare each line in the buffer with its successor. */
1874 for (i
= 0; i
< lines
.used
- 1; ++i
)
1876 cmp
= compare (&lines
.lines
[i
], &lines
.lines
[i
+ 1]);
1877 if ((unique
&& cmp
>= 0) || (cmp
> 0))
1879 disorder_line
= &lines
.lines
[i
+ 1];
1880 disorder_line_number
= line_number
+ i
+ 1;
1885 line_number
+= lines
.used
;
1887 /* Save the last line of the buffer and refill the buffer. */
1888 prev_line
= lines
.lines
+ (lines
.used
- 1);
1889 if (prev_line
->length
+ 1 > alloc
)
1895 while (alloc
< prev_line
->length
+ 1);
1896 temp
.text
= xrealloc (temp
.text
, alloc
);
1898 assert (prev_line
->length
+ 1 <= alloc
);
1899 memcpy (temp
.text
, prev_line
->text
, prev_line
->length
+ 1);
1900 temp
.length
= prev_line
->length
;
1901 temp
.keybeg
= temp
.text
+ (prev_line
->keybeg
- prev_line
->text
);
1902 temp
.keylim
= temp
.text
+ (prev_line
->keylim
- prev_line
->text
);
1904 cc
= fillbuf (&buf
, fp
);
1908 findlines (&buf
, &lines
);
1909 /* Make sure the line saved from the old buffer contents is
1910 less than or equal to the first line of the new buffer. */
1911 cmp
= compare (&temp
, &lines
.lines
[0]);
1912 if ((unique
&& cmp
>= 0) || (cmp
> 0))
1914 disorder_line
= &lines
.lines
[0];
1915 disorder_line_number
= line_number
;
1923 if (disorder_line_number
)
1925 fprintf (stderr
, _("%s: %s:%d: disorder: "), program_name
, file_name
,
1926 disorder_line_number
);
1927 write_bytes (disorder_line
->text
, disorder_line
->length
, stderr
);
1928 putc (eolchar
, stderr
);
1932 free ((char *) lines
.lines
);
1934 return disorder_line_number
;
1937 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1938 Close FPS before returning. */
1941 mergefps (FILE **fps
, register int nfps
, FILE *ofp
)
1943 struct buffer buffer
[NMERGE
]; /* Input buffers for each file. */
1944 struct lines lines
[NMERGE
]; /* Line tables for each buffer. */
1945 struct line saved
; /* Saved line for unique check. */
1946 int savedflag
= 0; /* True if there is a saved line. */
1947 int savealloc
; /* Size allocated for the saved line. */
1948 int cur
[NMERGE
]; /* Current line in each line table. */
1949 int ord
[NMERGE
]; /* Table representing a permutation of fps,
1950 such that lines[ord[0]].lines[cur[ord[0]]]
1951 is the smallest line and will be next
1953 register int i
, j
, t
;
1955 #ifdef lint /* Suppress `used before initialized' warning. */
1959 /* Allocate space for a saved line if necessary. */
1962 savealloc
= linelength
;
1963 saved
.text
= xmalloc (savealloc
);
1966 /* Read initial lines from each input file. */
1967 for (i
= 0; i
< nfps
; ++i
)
1969 initbuf (&buffer
[i
], mergealloc
);
1970 /* If a file is empty, eliminate it from future consideration. */
1971 while (i
< nfps
&& !fillbuf (&buffer
[i
], fps
[i
]))
1975 for (j
= i
; j
< nfps
; ++j
)
1976 fps
[j
] = fps
[j
+ 1];
1979 free (buffer
[i
].buf
);
1982 initlines (&lines
[i
], mergealloc
/ linelength
+ 1,
1983 LINEALLOC
/ ((NMERGE
+ NMERGE
) * sizeof (struct line
)));
1984 findlines (&buffer
[i
], &lines
[i
]);
1989 /* Set up the ord table according to comparisons among input lines.
1990 Since this only reorders two items if one is strictly greater than
1991 the other, it is stable. */
1992 for (i
= 0; i
< nfps
; ++i
)
1994 for (i
= 1; i
< nfps
; ++i
)
1995 if (compare (&lines
[ord
[i
- 1]].lines
[cur
[ord
[i
- 1]]],
1996 &lines
[ord
[i
]].lines
[cur
[ord
[i
]]]) > 0)
1997 t
= ord
[i
- 1], ord
[i
- 1] = ord
[i
], ord
[i
] = t
, i
= 0;
1999 /* Repeatedly output the smallest line until no input remains. */
2002 /* If uniqified output is turned on, output only the first of
2003 an identical series of lines. */
2006 if (savedflag
&& compare (&saved
, &lines
[ord
[0]].lines
[cur
[ord
[0]]]))
2008 write_bytes (saved
.text
, saved
.length
, ofp
);
2009 putc (eolchar
, ofp
);
2014 if (savealloc
< lines
[ord
[0]].lines
[cur
[ord
[0]]].length
+ 1)
2016 while (savealloc
< lines
[ord
[0]].lines
[cur
[ord
[0]]].length
+ 1)
2018 saved
.text
= xrealloc (saved
.text
, savealloc
);
2020 saved
.length
= lines
[ord
[0]].lines
[cur
[ord
[0]]].length
;
2021 memcpy (saved
.text
, lines
[ord
[0]].lines
[cur
[ord
[0]]].text
,
2023 if (lines
[ord
[0]].lines
[cur
[ord
[0]]].keybeg
!= NULL
)
2025 saved
.keybeg
= saved
.text
+
2026 (lines
[ord
[0]].lines
[cur
[ord
[0]]].keybeg
2027 - lines
[ord
[0]].lines
[cur
[ord
[0]]].text
);
2029 if (lines
[ord
[0]].lines
[cur
[ord
[0]]].keylim
!= NULL
)
2031 saved
.keylim
= saved
.text
+
2032 (lines
[ord
[0]].lines
[cur
[ord
[0]]].keylim
2033 - lines
[ord
[0]].lines
[cur
[ord
[0]]].text
);
2040 write_bytes (lines
[ord
[0]].lines
[cur
[ord
[0]]].text
,
2041 lines
[ord
[0]].lines
[cur
[ord
[0]]].length
, ofp
);
2042 putc (eolchar
, ofp
);
2045 /* Check if we need to read more lines into core. */
2046 if (++cur
[ord
[0]] == lines
[ord
[0]].used
)
2048 if (fillbuf (&buffer
[ord
[0]], fps
[ord
[0]]))
2050 findlines (&buffer
[ord
[0]], &lines
[ord
[0]]);
2055 /* We reached EOF on fps[ord[0]]. */
2056 for (i
= 1; i
< nfps
; ++i
)
2057 if (ord
[i
] > ord
[0])
2060 xfclose (fps
[ord
[0]]);
2061 free (buffer
[ord
[0]].buf
);
2062 free ((char *) lines
[ord
[0]].lines
);
2063 for (i
= ord
[0]; i
< nfps
; ++i
)
2065 fps
[i
] = fps
[i
+ 1];
2066 buffer
[i
] = buffer
[i
+ 1];
2067 lines
[i
] = lines
[i
+ 1];
2068 cur
[i
] = cur
[i
+ 1];
2070 for (i
= 0; i
< nfps
; ++i
)
2071 ord
[i
] = ord
[i
+ 1];
2076 /* The new line just read in may be larger than other lines
2077 already in core; push it back in the queue until we encounter
2078 a line larger than it. */
2079 for (i
= 1; i
< nfps
; ++i
)
2081 t
= compare (&lines
[ord
[0]].lines
[cur
[ord
[0]]],
2082 &lines
[ord
[i
]].lines
[cur
[ord
[i
]]]);
2084 t
= ord
[0] - ord
[i
];
2089 for (j
= 1; j
< i
; ++j
)
2090 ord
[j
- 1] = ord
[j
];
2094 if (unique
&& savedflag
)
2096 write_bytes (saved
.text
, saved
.length
, ofp
);
2097 putc (eolchar
, ofp
);
2104 /* Find the numeric format that this file represents to us for sorting. */
2106 nls_numeric_format (const struct line
*line
, int nlines
)
2108 struct nls_keyfield
*n_key
= nls_keyhead
;
2110 /* line = first line, nlines = number of lines,
2111 nls_fraction_found = false */
2112 for (; !nls_fraction_found
&& nlines
> 0; line
++, nlines
--)
2115 for (iter
= 0; !nls_fraction_found
; iter
++)
2119 struct keyfield
*key
= n_key
->key
;
2121 /* text = {}, lim = {}, key = first key */
2122 if (iter
|| line
->keybeg
== NULL
)
2124 /* Succeding keys, where the key field is
2126 if (key
->eword
>= 0) /* key->eword = length of key */
2127 lim
= limfield (line
, key
);
2129 lim
= line
->text
+ line
->length
;
2130 /* lim = end of key field */
2132 if (key
->sword
>= 0) /* key->sword = start of key */
2133 text
= begfield (line
, key
);
2136 /* text = start of field */
2140 /* First key is always the whole line */
2141 text
= line
->keybeg
;
2144 /* text = start of text to sort
2145 lim = end of text to sort */
2147 look_for_fraction (text
, lim
);
2149 /* nls_fraction_found = decimal_point found? */
2151 if ((n_key
= n_key
->next
) == nls_keyhead
)
2152 break; /* No more keys for this line */
2155 nls_fraction_found
= 1;
2156 /* decide on current decimal_point known */
2161 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
2164 sortlines (struct line
*lines
, int nlines
, struct line
*temp
)
2166 register struct line
*lo
, *hi
, *t
;
2167 register int nlo
, nhi
;
2171 if (compare (&lines
[0], &lines
[1]) > 0)
2174 lines
[0] = lines
[1];
2186 sortlines (lo
, nlo
, temp
);
2189 sortlines (hi
, nhi
, temp
);
2194 if (compare (lo
, hi
) <= 0)
2195 *t
++ = *lo
++, --nlo
;
2197 *t
++ = *hi
++, --nhi
;
2201 for (lo
= lines
, nlo
= nlines
- nhi
, t
= temp
; nlo
; --nlo
)
2205 /* Check that each of the NFILES FILES is ordered.
2206 Return a count of disordered files. */
2209 check (char **files
, int nfiles
)
2211 int i
, disorders
= 0;
2214 for (i
= 0; i
< nfiles
; ++i
)
2216 fp
= xfopen (files
[i
], "r");
2217 if (checkfp (fp
, files
[i
]))
2225 /* Merge NFILES FILES onto OFP. */
2228 merge (char **files
, int nfiles
, FILE *ofp
)
2232 FILE *fps
[NMERGE
], *tfp
;
2234 while (nfiles
> NMERGE
)
2237 for (i
= 0; i
< nfiles
/ NMERGE
; ++i
)
2239 for (j
= 0; j
< NMERGE
; ++j
)
2240 fps
[j
] = xfopen (files
[i
* NMERGE
+ j
], "r");
2241 tfp
= xtmpfopen (temp
= tempname ());
2242 mergefps (fps
, NMERGE
, tfp
);
2244 for (j
= 0; j
< NMERGE
; ++j
)
2245 zaptemp (files
[i
* NMERGE
+ j
]);
2248 for (j
= 0; j
< nfiles
% NMERGE
; ++j
)
2249 fps
[j
] = xfopen (files
[i
* NMERGE
+ j
], "r");
2250 tfp
= xtmpfopen (temp
= tempname ());
2251 mergefps (fps
, nfiles
% NMERGE
, tfp
);
2253 for (j
= 0; j
< nfiles
% NMERGE
; ++j
)
2254 zaptemp (files
[i
* NMERGE
+ j
]);
2259 for (i
= 0; i
< nfiles
; ++i
)
2260 fps
[i
] = xfopen (files
[i
], "r");
2261 mergefps (fps
, i
, ofp
);
2262 for (i
= 0; i
< nfiles
; ++i
)
2266 /* Sort NFILES FILES onto OFP. */
2269 sort (char **files
, int nfiles
, FILE *ofp
)
2276 struct tempnode
*node
;
2277 int n_temp_files
= 0;
2280 initbuf (&buf
, sortalloc
);
2281 initlines (&lines
, sortalloc
/ linelength
+ 1,
2282 LINEALLOC
/ sizeof (struct line
));
2284 tmp
= (struct line
*) xmalloc (ntmp
* sizeof (struct line
));
2288 fp
= xfopen (*files
++, "r");
2289 while (fillbuf (&buf
, fp
))
2291 findlines (&buf
, &lines
);
2292 if (lines
.used
> ntmp
)
2294 while (lines
.used
> ntmp
)
2296 tmp
= (struct line
*)
2297 xrealloc ((char *) tmp
, ntmp
* sizeof (struct line
));
2301 nls_keyhead
= nls_keyhead
->next
;
2302 if (!nls_fraction_found
&& nls_keyhead
)
2303 nls_numeric_format (lines
.lines
, lines
.used
);
2305 sortlines (lines
.lines
, lines
.used
, tmp
);
2306 if (feof (fp
) && !nfiles
&& !n_temp_files
&& !buf
.left
)
2313 tfp
= xtmpfopen (tempname ());
2315 for (i
= 0; i
< lines
.used
; ++i
)
2316 if (!unique
|| i
== 0
2317 || compare (&lines
.lines
[i
], &lines
.lines
[i
- 1]))
2319 write_bytes (lines
.lines
[i
].text
, lines
.lines
[i
].length
, tfp
);
2320 putc (eolchar
, tfp
);
2329 free ((char *) lines
.lines
);
2330 free ((char *) tmp
);
2334 tempfiles
= (char **) xmalloc (n_temp_files
* sizeof (char *));
2336 for (node
= temphead
.next
; i
> 0; node
= node
->next
)
2337 tempfiles
[--i
] = node
->name
;
2338 merge (tempfiles
, n_temp_files
, ofp
);
2339 free ((char *) tempfiles
);
2343 /* Insert key KEY at the end of the list (`keyhead'). */
2346 insertkey (struct keyfield
*key
)
2348 struct keyfield
*k
= &keyhead
;
2355 if (key
->numeric
|| key
->general_numeric
)
2357 struct nls_keyfield
*nk
;
2359 nk
= (struct nls_keyfield
*) xmalloc (sizeof (struct nls_keyfield
));
2363 nk
->next
= nls_keyhead
->next
;
2364 nls_keyhead
->next
= nk
;
2374 badfieldspec (const char *s
)
2376 error (SORT_FAILURE
, 0, _("invalid field specification `%s'"), s
);
2379 /* Handle interrupts and hangups. */
2382 sighandler (int sig
)
2385 struct sigaction sigact
;
2387 sigact
.sa_handler
= SIG_DFL
;
2388 sigemptyset (&sigact
.sa_mask
);
2389 sigact
.sa_flags
= 0;
2390 sigaction (sig
, &sigact
, NULL
);
2391 #else /* !SA_INTERRUPT */
2392 signal (sig
, SIG_DFL
);
2393 #endif /* SA_INTERRUPT */
2395 kill (getpid (), sig
);
2398 /* Set the ordering options for KEY specified in S.
2399 Return the address of the first character in S that
2400 is not a valid ordering option.
2401 BLANKTYPE is the kind of blanks that 'b' should skip. */
2404 set_ordering (register const char *s
, struct keyfield
*key
,
2405 enum blanktype blanktype
)
2412 if (blanktype
== bl_start
|| blanktype
== bl_both
)
2413 key
->skipsblanks
= 1;
2414 if (blanktype
== bl_end
|| blanktype
== bl_both
)
2415 key
->skipeblanks
= 1;
2418 key
->ignore
= nondictionary
;
2421 key
->translate
= fold_toupper
;
2424 key
->general_numeric
= 1;
2427 key
->ignore
= nonprinting
;
2447 key_init (struct keyfield
*key
)
2449 memset (key
, 0, sizeof (*key
));
2453 /* strdup and return the result of setlocale, but guard against a NULL
2454 return value. If setlocale returns NULL, strdup FAIL_VAL instead. */
2456 #if defined ENABLE_NLS && ( !defined __GLIBC__ || __GLIBC__ < 2 )
2457 static inline char *
2458 my_setlocale (const char *locale
, const char *fail_val
)
2460 char *s
= setlocale (LC_ALL
, locale
);
2462 s
= (char *) fail_val
;
2468 main (int argc
, char **argv
)
2470 struct keyfield
*key
= NULL
, gkey
;
2473 int checkonly
= 0, mergeonly
= 0, nfiles
= 0;
2474 char *minus
= "-", *outfile
= minus
, **files
, *tmp
;
2477 struct sigaction oldact
, newact
;
2478 #endif /* SA_INTERRUPT */
2480 program_name
= argv
[0];
2484 /* Determine whether the current locale is C or POSIX. */
2485 # if defined __GLIBC__ && __GLIBC__ >= 2
2486 s
= setlocale (LC_ALL
, "");
2487 if (s
!= NULL
&& !STREQ (s
, "C") && !STREQ (s
, "POSIX"))
2488 /* The current locale is neither C nor POSIX. We'll need to do
2493 char *c_locale_string
= my_setlocale ("C", "");
2494 char *posix_locale_string
= my_setlocale ("POSIX", "");
2495 char *current_locale_string
= setlocale (LC_ALL
, "");
2497 if (current_locale_string
!= NULL
2498 && !STREQ (current_locale_string
, c_locale_string
)
2499 && !STREQ (current_locale_string
, posix_locale_string
))
2500 /* The current locale is neither C nor POSIX.
2501 We'll need to do more work. */
2504 free (c_locale_string
);
2505 free (posix_locale_string
);
2509 /* Let's get locale's representation of the decimal point */
2511 struct lconv
*lconvp
= localeconv ();
2513 decimal_point
= *lconvp
->decimal_point
;
2514 th_sep
= *lconvp
->thousands_sep
;
2515 nls_grouping
= (char *) (lconvp
->grouping
);
2518 /* if locale doesn't define a decimal point, we'll use the
2520 if (decimal_point
== '\0')
2521 decimal_point
= FLOATING_POINT
;
2523 nls_fraction_found
= 0; /* Figure out which decimal point to use */
2525 nls_month_found
= 0; /* Figure out which month notation to use */
2527 monthtab
= nls_monthtab
;
2531 bindtextdomain (PACKAGE
, LOCALEDIR
);
2532 textdomain (PACKAGE
);
2534 parse_long_options (argc
, argv
, PROGRAM_NAME
, GNU_PACKAGE
, VERSION
,
2537 have_read_stdin
= 0;
2540 temp_dir
= getenv ("TMPDIR");
2541 if (temp_dir
== NULL
)
2542 temp_dir
= DEFAULT_TMPDIR
;
2544 /* Change the way xmalloc and xrealloc fail. */
2545 xalloc_exit_failure
= SORT_FAILURE
;
2546 xalloc_fail_func
= cleanup
;
2549 newact
.sa_handler
= sighandler
;
2550 sigemptyset (&newact
.sa_mask
);
2551 newact
.sa_flags
= 0;
2553 sigaction (SIGINT
, NULL
, &oldact
);
2554 if (oldact
.sa_handler
!= SIG_IGN
)
2555 sigaction (SIGINT
, &newact
, NULL
);
2556 sigaction (SIGHUP
, NULL
, &oldact
);
2557 if (oldact
.sa_handler
!= SIG_IGN
)
2558 sigaction (SIGHUP
, &newact
, NULL
);
2559 sigaction (SIGPIPE
, NULL
, &oldact
);
2560 if (oldact
.sa_handler
!= SIG_IGN
)
2561 sigaction (SIGPIPE
, &newact
, NULL
);
2562 sigaction (SIGTERM
, NULL
, &oldact
);
2563 if (oldact
.sa_handler
!= SIG_IGN
)
2564 sigaction (SIGTERM
, &newact
, NULL
);
2565 #else /* !SA_INTERRUPT */
2566 if (signal (SIGINT
, SIG_IGN
) != SIG_IGN
)
2567 signal (SIGINT
, sighandler
);
2568 if (signal (SIGHUP
, SIG_IGN
) != SIG_IGN
)
2569 signal (SIGHUP
, sighandler
);
2570 if (signal (SIGPIPE
, SIG_IGN
) != SIG_IGN
)
2571 signal (SIGPIPE
, sighandler
);
2572 if (signal (SIGTERM
, SIG_IGN
) != SIG_IGN
)
2573 signal (SIGTERM
, sighandler
);
2574 #endif /* !SA_INTERRUPT */
2576 gkey
.sword
= gkey
.eword
= -1;
2578 gkey
.translate
= NULL
;
2579 gkey
.numeric
= gkey
.general_numeric
= gkey
.month
= gkey
.reverse
= 0;
2580 gkey
.skipsblanks
= gkey
.skipeblanks
= 0;
2582 files
= (char **) xmalloc (sizeof (char *) * argc
);
2584 for (i
= 1; i
< argc
; ++i
)
2586 if (argv
[i
][0] == '+')
2590 key
= (struct keyfield
*) xmalloc (sizeof (struct keyfield
));
2593 if (! (ISDIGIT (*s
) || (*s
== '.' && ISDIGIT (s
[1]))))
2594 badfieldspec (argv
[i
]);
2595 for (t
= 0; ISDIGIT (*s
); ++s
)
2596 t
= 10 * t
+ *s
- '0';
2599 for (++s
; ISDIGIT (*s
); ++s
)
2600 t2
= 10 * t2
+ *s
- '0';
2608 s
= set_ordering (s
, key
, bl_start
);
2610 badfieldspec (argv
[i
]);
2612 else if (argv
[i
][0] == '-' && argv
[i
][1])
2615 if (ISDIGIT (*s
) || (*s
== '.' && ISDIGIT (s
[1])))
2619 /* Provoke with `sort -9'. */
2620 error (0, 0, _("when using the old-style +POS and -POS \
2621 key specifiers,\nthe +POS specifier must come first"));
2622 usage (SORT_FAILURE
);
2624 for (t
= 0; ISDIGIT (*s
); ++s
)
2625 t
= t
* 10 + *s
- '0';
2628 for (++s
; ISDIGIT (*s
); ++s
)
2629 t2
= t2
* 10 + *s
- '0';
2632 s
= set_ordering (s
, key
, bl_end
);
2634 badfieldspec (argv
[i
]);
2641 s
= set_ordering (s
, &gkey
, bl_both
);
2655 error (SORT_FAILURE
, 0,
2656 _("option `-k' requires an argument"));
2662 key
= (struct keyfield
*)
2663 xmalloc (sizeof (struct keyfield
));
2667 badfieldspec (argv
[i
]);
2668 for (t
= 0; ISDIGIT (*s
); ++s
)
2669 t
= 10 * t
+ *s
- '0';
2672 /* Provoke with `sort -k0' */
2673 error (0, 0, _("the starting field number argument \
2674 to the `-k' option must be positive"));
2675 badfieldspec (argv
[i
]);
2681 if (!ISDIGIT (s
[1]))
2683 /* Provoke with `sort -k1.' */
2684 error (0, 0, _("starting field spec has `.' but \
2685 lacks following character offset"));
2686 badfieldspec (argv
[i
]);
2688 for (++s
; ISDIGIT (*s
); ++s
)
2689 t2
= 10 * t2
+ *s
- '0';
2692 /* Provoke with `sort -k1.0' */
2693 error (0, 0, _("starting field character offset \
2694 argument to the `-k' option\nmust be positive"));
2695 badfieldspec (argv
[i
]);
2706 s
= set_ordering (s
, key
, bl_start
);
2713 badfieldspec (argv
[i
]);
2716 /* Skip over comma. */
2720 /* Provoke with `sort -k1,' */
2721 error (0, 0, _("field specification has `,' but \
2722 lacks following field spec"));
2723 badfieldspec (argv
[i
]);
2726 for (t
= 0; ISDIGIT (*s
); ++s
)
2727 t
= t
* 10 + *s
- '0';
2730 /* Provoke with `sort -k1,0' */
2731 error (0, 0, _("ending field number argument \
2732 to the `-k' option must be positive"));
2733 badfieldspec (argv
[i
]);
2739 if (!ISDIGIT (s
[1]))
2741 /* Provoke with `sort -k1,1.' */
2742 error (0, 0, _("ending field spec has `.' \
2743 but lacks following character offset"));
2744 badfieldspec (argv
[i
]);
2746 for (++s
; ISDIGIT (*s
); ++s
)
2747 t2
= t2
* 10 + *s
- '0';
2751 /* `-k 2,3' is equivalent to `+1 -3'. */
2756 s
= set_ordering (s
, key
, bl_end
);
2758 badfieldspec (argv
[i
]);
2772 error (SORT_FAILURE
, 0,
2773 _("option `-o' requires an argument"));
2775 outfile
= argv
[++i
];
2784 else if (i
< argc
- 1)
2790 error (SORT_FAILURE
, 0,
2791 _("option `-t' requires an argument"));
2799 temp_dir
= argv
[++i
];
2801 error (SORT_FAILURE
, 0,
2802 _("option `-T' requires an argument"));
2813 /* Accept and ignore e.g. -y0 for compatibility with
2817 fprintf (stderr
, _("%s: unrecognized option `-%c'\n"),
2819 usage (SORT_FAILURE
);
2825 else /* Not an option. */
2827 files
[nfiles
++] = argv
[i
];
2835 /* Inheritance of global options to individual keys. */
2836 for (key
= keyhead
.next
; key
; key
= key
->next
)
2837 if (!key
->ignore
&& !key
->translate
&& !key
->skipsblanks
&& !key
->reverse
2838 && !key
->skipeblanks
&& !key
->month
&& !key
->numeric
2839 && !key
->general_numeric
)
2841 key
->ignore
= gkey
.ignore
;
2842 key
->translate
= gkey
.translate
;
2843 key
->skipsblanks
= gkey
.skipsblanks
;
2844 key
->skipeblanks
= gkey
.skipeblanks
;
2845 key
->month
= gkey
.month
;
2846 key
->numeric
= gkey
.numeric
;
2847 key
->general_numeric
= gkey
.general_numeric
;
2848 key
->reverse
= gkey
.reverse
;
2851 if (!keyhead
.next
&& (gkey
.ignore
|| gkey
.translate
|| gkey
.skipsblanks
2852 || gkey
.skipeblanks
|| gkey
.month
|| gkey
.numeric
2853 || gkey
.general_numeric
))
2855 reverse
= gkey
.reverse
;
2865 /* POSIX requires that sort return 1 IFF invoked with -c and the
2866 input is not properly sorted. */
2867 exit (check (files
, nfiles
) == 0 ? 0 : 1);
2870 if (!STREQ (outfile
, "-"))
2872 struct stat outstat
;
2873 if (stat (outfile
, &outstat
) == 0)
2875 /* The following code prevents a race condition when
2876 people use the brain dead shell programming idiom:
2877 cat file | sort -o file
2878 This feature is provided for historical compatibility,
2879 but we strongly discourage ever relying on this in
2880 new shell programs. */
2882 /* Temporarily copy each input file that might be another name
2883 for the output file. When in doubt (e.g. a pipe), copy. */
2884 for (i
= 0; i
< nfiles
; ++i
)
2891 if (S_ISREG (outstat
.st_mode
) && !STREQ (outfile
, files
[i
]))
2894 if ((STREQ (files
[i
], "-")
2895 ? fstat (STDIN_FILENO
, &instat
)
2896 : stat (files
[i
], &instat
)) != 0)
2898 error (0, errno
, "%s", files
[i
]);
2900 exit (SORT_FAILURE
);
2902 if (S_ISREG (instat
.st_mode
) && !SAME_INODE (instat
, outstat
))
2904 /* We know the files are distinct. */
2909 in_fp
= xfopen (files
[i
], "r");
2911 out_fp
= xtmpfopen (tmp
);
2912 /* FIXME: maybe use copy.c(copy) here. */
2913 while ((cc
= fread (buf
, 1, sizeof buf
, in_fp
)) > 0)
2914 write_bytes (buf
, cc
, out_fp
);
2917 error (0, errno
, "%s", files
[i
]);
2919 exit (SORT_FAILURE
);
2925 ofp
= xfopen (outfile
, "w");
2929 /* A non-`-' outfile was specified, but the file doesn't yet exist.
2930 Before opening it for writing (thus creating it), make sure all
2931 of the input files exist. Otherwise, creating the output file
2932 could create an otherwise missing input file, making sort succeed
2933 when it should fail. */
2934 for (i
= 0; i
< nfiles
; ++i
)
2937 if (STREQ (files
[i
], "-"))
2939 if (stat (files
[i
], &sb
))
2941 error (0, errno
, "%s", files
[i
]);
2943 exit (SORT_FAILURE
);
2947 ofp
= xfopen (outfile
, "w");
2956 merge (files
, nfiles
, ofp
);
2958 sort (files
, nfiles
, ofp
);
2961 /* If we wait for the implicit flush on exit, and the parent process
2962 has closed stdout (e.g., exec >&- in a shell), then the output file
2963 winds up empty. I don't understand why. This is under SunOS,
2964 Solaris, Ultrix, and Irix. This premature fflush makes the output
2965 reappear. --karl@cs.umb.edu */
2966 if (fflush (ofp
) < 0)
2967 error (SORT_FAILURE
, errno
, _("%s: write error"), outfile
);
2969 if (have_read_stdin
&& fclose (stdin
) == EOF
)
2970 error (SORT_FAILURE
, errno
, "%s", outfile
);
2971 if (ferror (stdout
) || fclose (stdout
) == EOF
)
2972 error (SORT_FAILURE
, errno
, _("%s: write error"), outfile
);
2974 exit (EXIT_SUCCESS
);