1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991, 1992, 1993, 1994, 1995 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
24 /* Get isblank from GNU libc. */
27 #include <sys/types.h>
32 #include "long-options.h"
50 /* Undefine, to avoid warning about redefinition on some systems. */
52 #define min(a, b) ((a) < (b) ? (a) : (b))
54 #define UCHAR_LIM (UCHAR_MAX + 1)
55 #define UCHAR(c) ((unsigned char) (c))
57 #ifndef DEFAULT_TMPDIR
58 #define DEFAULT_TMPDIR "/tmp"
61 /* The kind of blanks for '-b' to skip in various options. */
62 enum blanktype
{ bl_start
, bl_end
, bl_both
};
64 /* The name this program was run with. */
67 /* Table of digits. */
68 static int digits
[UCHAR_LIM
];
70 /* Table of white space. */
71 static int blanks
[UCHAR_LIM
];
73 /* Table of non-printing characters. */
74 static int nonprinting
[UCHAR_LIM
];
76 /* Table of non-dictionary characters (not letters, digits, or blanks). */
77 static int nondictionary
[UCHAR_LIM
];
79 /* Translation table folding lower case to upper. */
80 static char fold_toupper
[UCHAR_LIM
];
82 /* Table mapping 3-letter month names to integers.
83 Alphabetic order allows binary search. */
104 /* During the merge phase, the number of files to merge at once. */
107 /* Initial buffer size for in core sorting. Will not grow unless a
108 line longer than this is seen. */
109 static int sortalloc
= 512 * 1024;
111 /* Initial buffer size for in core merge buffers. Bear in mind that
112 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
113 static int mergealloc
= 16 * 1024;
115 /* Guess of average line length. */
116 static int linelength
= 30;
118 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
119 #define LINEALLOC (256 * 1024)
121 /* Prefix for temporary file names. */
122 static char *temp_file_prefix
;
124 /* Flag to reverse the order of all comparisons. */
127 /* Flag for stable sort. This turns off the last ditch bytewise
128 comparison of lines, and instead leaves lines in the same order
129 they were read if all keys compare equal. */
132 /* Tab character separating fields. If NUL, then fields are separated
133 by the empty string between a non-whitespace character and a whitespace
137 /* Flag to remove consecutive duplicate lines from the output.
138 Only the last of a sequence of equal lines will be output. */
141 /* Nonzero if any of the input files are the standard input. */
142 static int have_read_stdin
;
144 /* Lines are held in core as counted strings. */
147 char *text
; /* Text of the line. */
148 int length
; /* Length not including final newline. */
149 char *keybeg
; /* Start of first key. */
150 char *keylim
; /* Limit of first key. */
153 /* Arrays of lines. */
156 struct line
*lines
; /* Dynamically allocated array of lines. */
157 int used
; /* Number of slots used. */
158 int alloc
; /* Number of slots allocated. */
159 int limit
; /* Max number of slots to allocate. */
165 char *buf
; /* Dynamically allocated buffer. */
166 int used
; /* Number of bytes used. */
167 int alloc
; /* Number of bytes allocated. */
168 int left
; /* Number of bytes left after line parsing. */
171 /* Lists of key field comparisons to be tried. */
172 static struct keyfield
174 int sword
; /* Zero-origin 'word' to start at. */
175 int schar
; /* Additional characters to skip. */
176 int skipsblanks
; /* Skip leading white space at start. */
177 int eword
; /* Zero-origin first word after field. */
178 int echar
; /* Additional characters in field. */
179 int skipeblanks
; /* Skip trailing white space at finish. */
180 int *ignore
; /* Boolean array of characters to ignore. */
181 char *translate
; /* Translation applied to characters. */
182 int numeric
; /* Flag for numeric comparison. */
183 int month
; /* Flag for comparison by month name. */
184 int reverse
; /* Reverse the sense of comparison. */
185 struct keyfield
*next
; /* Next keyfield to try. */
188 /* The list of temporary files. */
189 static struct tempnode
192 struct tempnode
*next
;
195 /* Clean up any remaining temporary files. */
200 struct tempnode
*node
;
202 for (node
= temphead
.next
; node
; node
= node
->next
)
206 /* Allocate N bytes of memory dynamically, with error checking. */
217 error (0, 0, "virtual memory exhausted");
224 /* Change the size of an allocated block of memory P to N bytes,
226 If P is NULL, run xmalloc.
227 If N is 0, run free and return NULL. */
244 error (0, 0, "virtual memory exhausted");
255 FILE *fp
= strcmp (file
, "-") ? fopen (file
, how
) : stdin
;
259 error (0, errno
, "%s", file
);
274 /* Allow reading stdin from tty more than once. */
278 else if (fp
== stdout
)
280 if (fflush (fp
) != 0)
282 error (0, errno
, "flushing file");
289 if (fclose (fp
) != 0)
291 error (0, errno
, "error closing file");
299 xfwrite (buf
, size
, nelem
, fp
)
304 if (fwrite (buf
, size
, nelem
, fp
) != nelem
)
306 error (0, errno
, "write error");
312 /* Return a name for a temporary file. */
318 int len
= strlen (temp_file_prefix
);
319 char *name
= xmalloc (len
+ 16);
320 struct tempnode
*node
=
321 (struct tempnode
*) xmalloc (sizeof (struct tempnode
));
324 "%s%ssort%5.5d%5.5d",
326 (len
&& temp_file_prefix
[len
- 1] != '/') ? "/" : "",
327 (unsigned int) getpid () & 0xffff, ++seq
);
329 node
->next
= temphead
.next
;
330 temphead
.next
= node
;
334 /* Search through the list of temporary files for NAME;
335 remove it if it is found on the list. */
341 struct tempnode
*node
, *temp
;
343 for (node
= &temphead
; node
->next
; node
= node
->next
)
344 if (!strcmp (name
, node
->next
->name
))
351 node
->next
= temp
->next
;
352 free ((char *) temp
);
356 /* Initialize the character class tables. */
363 for (i
= 0; i
< UCHAR_LIM
; ++i
)
371 if (!ISALNUM (i
) && !ISBLANK (i
))
372 nondictionary
[i
] = 1;
374 fold_toupper
[i
] = toupper (i
);
380 /* Initialize BUF, allocating ALLOC bytes initially. */
388 buf
->buf
= xmalloc (buf
->alloc
);
389 buf
->used
= buf
->left
= 0;
392 /* Fill BUF reading from FP, moving buf->left bytes from the end
393 of buf->buf to the beginning first. If EOF is reached and the
394 file wasn't terminated by a newline, supply one. Return a count
395 of bytes buffered. */
404 memmove (buf
->buf
, buf
->buf
+ buf
->used
- buf
->left
, buf
->left
);
405 buf
->used
= buf
->left
;
407 while (!feof (fp
) && (buf
->used
== 0 || !memchr (buf
->buf
, '\n', buf
->used
)))
409 if (buf
->used
== buf
->alloc
)
412 buf
->buf
= xrealloc (buf
->buf
, buf
->alloc
);
414 cc
= fread (buf
->buf
+ buf
->used
, 1, buf
->alloc
- buf
->used
, fp
);
417 error (0, errno
, "read error");
424 if (feof (fp
) && buf
->used
&& buf
->buf
[buf
->used
- 1] != '\n')
426 if (buf
->used
== buf
->alloc
)
429 buf
->buf
= xrealloc (buf
->buf
, buf
->alloc
);
431 buf
->buf
[buf
->used
++] = '\n';
437 /* Initialize LINES, allocating space for ALLOC lines initially.
438 LIMIT is the maximum possible number of lines to allocate space
442 initlines (lines
, alloc
, limit
)
447 lines
->alloc
= alloc
;
448 lines
->lines
= (struct line
*) xmalloc (lines
->alloc
* sizeof (struct line
));
450 lines
->limit
= limit
;
453 /* Return a pointer to the first character of the field specified
459 struct keyfield
*key
;
461 register char *ptr
= line
->text
, *lim
= ptr
+ line
->length
;
462 register int sword
= key
->sword
, schar
= key
->schar
;
465 while (ptr
< lim
&& sword
--)
467 while (ptr
< lim
&& *ptr
!= tab
)
473 while (ptr
< lim
&& sword
--)
475 while (ptr
< lim
&& blanks
[UCHAR (*ptr
)])
477 while (ptr
< lim
&& !blanks
[UCHAR (*ptr
)])
481 if (key
->skipsblanks
)
482 while (ptr
< lim
&& blanks
[UCHAR (*ptr
)])
485 while (ptr
< lim
&& schar
--)
491 /* Return the limit of (a pointer to the first character after) the field
492 in LINE specified by KEY. */
497 struct keyfield
*key
;
499 register char *ptr
= line
->text
, *lim
= ptr
+ line
->length
;
500 register int eword
= key
->eword
, echar
= key
->echar
;
503 while (ptr
< lim
&& eword
--)
505 while (ptr
< lim
&& *ptr
!= tab
)
507 if (ptr
< lim
&& (eword
|| key
->skipeblanks
))
511 while (ptr
< lim
&& eword
--)
513 while (ptr
< lim
&& blanks
[UCHAR (*ptr
)])
515 while (ptr
< lim
&& !blanks
[UCHAR (*ptr
)])
519 if (key
->skipeblanks
)
520 while (ptr
< lim
&& blanks
[UCHAR (*ptr
)])
523 while (ptr
< lim
&& echar
--)
529 /* Find the lines in BUF, storing pointers and lengths in LINES.
530 Also replace newlines with NULs. */
533 findlines (buf
, lines
)
537 register char *beg
= buf
->buf
, *lim
= buf
->buf
+ buf
->used
, *ptr
;
538 struct keyfield
*key
= keyhead
.next
;
542 while (beg
< lim
&& (ptr
= memchr (beg
, '\n', lim
- beg
))
543 && lines
->used
< lines
->limit
)
545 /* There are various places in the code that rely on a NUL
546 being at the end of in-core lines; NULs inside the lines
547 will not cause trouble, though. */
550 if (lines
->used
== lines
->alloc
)
553 lines
->lines
= (struct line
*)
554 xrealloc ((char *) lines
->lines
,
555 lines
->alloc
* sizeof (struct line
));
558 lines
->lines
[lines
->used
].text
= beg
;
559 lines
->lines
[lines
->used
].length
= ptr
- beg
;
561 /* Precompute the position of the first key for efficiency. */
565 lines
->lines
[lines
->used
].keylim
=
566 limfield (&lines
->lines
[lines
->used
], key
);
568 lines
->lines
[lines
->used
].keylim
= ptr
;
571 lines
->lines
[lines
->used
].keybeg
=
572 begfield (&lines
->lines
[lines
->used
], key
);
575 if (key
->skipsblanks
)
576 while (blanks
[UCHAR (*beg
)])
578 lines
->lines
[lines
->used
].keybeg
= beg
;
583 lines
->lines
[lines
->used
].keybeg
= 0;
584 lines
->lines
[lines
->used
].keylim
= 0;
591 buf
->left
= lim
- beg
;
594 /* Compare strings A and B containing decimal fractions < 1. Each string
595 should begin with a decimal point followed immediately by the digits
596 of the fraction. Strings not of this form are considered to be zero. */
600 register char *a
, *b
;
602 register tmpa
= UCHAR (*a
), tmpb
= UCHAR (*b
);
604 if (tmpa
== '.' && tmpb
== '.')
607 tmpa
= UCHAR (*++a
), tmpb
= UCHAR (*++b
);
608 while (tmpa
== tmpb
&& digits
[tmpa
]);
609 if (digits
[tmpa
] && digits
[tmpb
])
629 else if (tmpa
== '.')
638 else if (tmpb
== '.')
650 /* Compare strings A and B as numbers without explicitly converting them to
651 machine numbers. Comparatively slow for short strings, but asymptotically
656 register char *a
, *b
;
658 register int tmpa
, tmpb
, loga
, logb
, tmp
;
660 tmpa
= UCHAR (*a
), tmpb
= UCHAR (*b
);
672 if (digits
[tmpa
] && digits
[tmpb
])
683 while (tmpa
== tmpb
&& digits
[tmpa
])
684 tmpa
= UCHAR (*++a
), tmpb
= UCHAR (*++b
);
686 if ((tmpa
== '.' && !digits
[tmpb
]) || (tmpb
== '.' && !digits
[tmpa
]))
687 return -fraccompare (a
, b
);
690 for (loga
= 1; digits
[UCHAR (*++a
)]; ++loga
)
696 for (logb
= 1; digits
[UCHAR (*++b
)]; ++logb
)
701 if ((tmp
= logb
- loga
) != 0)
709 else if (tmpb
== '-')
711 if (digits
[UCHAR (tmpa
)] && digits
[UCHAR (*++b
)])
722 while (tmpa
== tmpb
&& digits
[tmpa
])
723 tmpa
= UCHAR (*++a
), tmpb
= UCHAR (*++b
);
725 if ((tmpa
== '.' && !digits
[tmpb
]) || (tmpb
== '.' && !digits
[tmpa
]))
726 return fraccompare (a
, b
);
729 for (loga
= 1; digits
[UCHAR (*++a
)]; ++loga
)
735 for (logb
= 1; digits
[UCHAR (*++b
)]; ++logb
)
740 if ((tmp
= loga
- logb
) != 0)
750 /* Return an integer <= 12 associated with month name S with length LEN,
751 0 if the name in S is not recognized. */
759 register int i
, lo
= 0, hi
= 12;
761 while (len
> 0 && blanks
[UCHAR(*s
)])
767 for (i
= 0; i
< 3; ++i
)
768 month
[i
] = fold_toupper
[UCHAR (s
[i
])];
772 if (strcmp (month
, monthtab
[(lo
+ hi
) / 2].name
) < 0)
776 if (!strcmp (month
, monthtab
[lo
].name
))
777 return monthtab
[lo
].val
;
781 /* Compare two lines A and B trying every key in sequence until there
782 are no more keys or a difference is found. */
788 register char *texta
, *textb
, *lima
, *limb
, *translate
;
789 register int *ignore
;
790 struct keyfield
*key
;
791 int diff
= 0, iter
= 0, lena
, lenb
;
793 for (key
= keyhead
.next
; key
; key
= key
->next
, ++iter
)
795 ignore
= key
->ignore
;
796 translate
= key
->translate
;
798 /* Find the beginning and limit of each field. */
799 if (iter
|| a
->keybeg
== NULL
|| b
->keybeg
== NULL
)
802 lima
= limfield (a
, key
), limb
= limfield (b
, key
);
804 lima
= a
->text
+ a
->length
, limb
= b
->text
+ b
->length
;
807 texta
= begfield (a
, key
), textb
= begfield (b
, key
);
810 texta
= a
->text
, textb
= b
->text
;
811 if (key
->skipsblanks
)
813 while (texta
< lima
&& blanks
[UCHAR (*texta
)])
815 while (textb
< limb
&& blanks
[UCHAR (*textb
)])
822 /* For the first iteration only, the key positions have
823 been precomputed for us. */
824 texta
= a
->keybeg
, lima
= a
->keylim
;
825 textb
= b
->keybeg
, limb
= b
->keylim
;
828 /* Find the lengths. */
829 lena
= lima
- texta
, lenb
= limb
- textb
;
835 /* Actually compare the fields. */
840 char savea
= *lima
, saveb
= *limb
;
842 *lima
= *limb
= '\0';
843 diff
= numcompare (texta
, textb
);
844 *lima
= savea
, *limb
= saveb
;
847 diff
= numcompare (texta
, textb
);
850 return key
->reverse
? -diff
: diff
;
855 diff
= getmonth (texta
, lena
) - getmonth (textb
, lenb
);
857 return key
->reverse
? -diff
: diff
;
860 else if (ignore
&& translate
)
861 while (texta
< lima
&& textb
< limb
)
863 while (texta
< lima
&& ignore
[UCHAR (*texta
)])
865 while (textb
< limb
&& ignore
[UCHAR (*textb
)])
867 if (texta
< lima
&& textb
< limb
&&
868 translate
[UCHAR (*texta
++)] != translate
[UCHAR (*textb
++)])
870 diff
= translate
[UCHAR (*--texta
)] - translate
[UCHAR (*--textb
)];
873 else if (texta
== lima
&& textb
< limb
) diff
= -1;
874 else if (texta
< lima
&& textb
== limb
) diff
= 1;
877 while (texta
< lima
&& textb
< limb
)
879 while (texta
< lima
&& ignore
[UCHAR (*texta
)])
881 while (textb
< limb
&& ignore
[UCHAR (*textb
)])
883 if (texta
< lima
&& textb
< limb
&& *texta
++ != *textb
++)
885 diff
= *--texta
- *--textb
;
888 else if (texta
== lima
&& textb
< limb
) diff
= -1;
889 else if (texta
< lima
&& textb
== limb
) diff
= 1;
892 while (texta
< lima
&& textb
< limb
)
894 if (translate
[UCHAR (*texta
++)] != translate
[UCHAR (*textb
++)])
896 diff
= translate
[UCHAR (*--texta
)] - translate
[UCHAR (*--textb
)];
901 diff
= memcmp (texta
, textb
, min (lena
, lenb
));
904 return key
->reverse
? -diff
: diff
;
905 if ((diff
= lena
- lenb
) != 0)
906 return key
->reverse
? -diff
: diff
;
912 /* Compare two lines A and B, returning negative, zero, or positive
913 depending on whether A compares less than, equal to, or greater than B. */
917 register struct line
*a
, *b
;
919 int diff
, tmpa
, tmpb
, mini
;
921 /* First try to compare on the specified keys (if any).
922 The only two cases with no key at all are unadorned sort,
923 and unadorned sort -r. */
926 diff
= keycompare (a
, b
);
929 if (unique
|| stable
)
933 /* If the keys all compare equal (or no keys were specified)
934 fall through to the default byte-by-byte comparison. */
935 tmpa
= a
->length
, tmpb
= b
->length
;
936 mini
= min (tmpa
, tmpb
);
941 char *ap
= a
->text
, *bp
= b
->text
;
943 diff
= UCHAR (*ap
) - UCHAR (*bp
);
946 diff
= memcmp (ap
, bp
, mini
);
952 return reverse
? -diff
: diff
;
955 /* Check that the lines read from the given FP come in order. Return
956 1 if they do and 0 if there is a disorder. */
962 struct buffer buf
; /* Input buffer. */
963 struct lines lines
; /* Lines scanned from the buffer. */
964 struct line
*prev_line
; /* Pointer to previous line. */
965 struct line temp
; /* Copy of previous line. */
966 int cc
; /* Character count. */
967 int cmp
; /* Result of calling compare. */
968 int alloc
, i
, success
= 1;
970 initbuf (&buf
, mergealloc
);
971 initlines (&lines
, mergealloc
/ linelength
+ 1,
972 LINEALLOC
/ ((NMERGE
+ NMERGE
) * sizeof (struct line
)));
974 temp
.text
= xmalloc (alloc
);
976 cc
= fillbuf (&buf
, fp
);
977 findlines (&buf
, &lines
);
982 /* Compare each line in the buffer with its successor. */
983 for (i
= 0; i
< lines
.used
- 1; ++i
)
985 cmp
= compare (&lines
.lines
[i
], &lines
.lines
[i
+ 1]);
986 if ((unique
&& cmp
>= 0) || (cmp
> 0))
993 /* Save the last line of the buffer and refill the buffer. */
994 prev_line
= lines
.lines
+ lines
.used
- 1;
995 if (prev_line
->length
> alloc
)
997 while (prev_line
->length
+ 1 > alloc
)
999 temp
.text
= xrealloc (temp
.text
, alloc
);
1001 memcpy (temp
.text
, prev_line
->text
, prev_line
->length
+ 1);
1002 temp
.length
= prev_line
->length
;
1003 temp
.keybeg
= temp
.text
+ (prev_line
->keybeg
- prev_line
->text
);
1004 temp
.keylim
= temp
.text
+ (prev_line
->keylim
- prev_line
->text
);
1006 cc
= fillbuf (&buf
, fp
);
1009 findlines (&buf
, &lines
);
1010 /* Make sure the line saved from the old buffer contents is
1011 less than or equal to the first line of the new buffer. */
1012 cmp
= compare (&temp
, &lines
.lines
[0]);
1013 if ((unique
&& cmp
>= 0) || (cmp
> 0))
1025 free ((char *) lines
.lines
);
1030 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1031 Close FPS before returning. */
1034 mergefps (fps
, nfps
, ofp
)
1038 struct buffer buffer
[NMERGE
]; /* Input buffers for each file. */
1039 struct lines lines
[NMERGE
]; /* Line tables for each buffer. */
1040 struct line saved
; /* Saved line for unique check. */
1041 int savedflag
= 0; /* True if there is a saved line. */
1042 int savealloc
; /* Size allocated for the saved line. */
1043 int cur
[NMERGE
]; /* Current line in each line table. */
1044 int ord
[NMERGE
]; /* Table representing a permutation of fps,
1045 such that lines[ord[0]].lines[cur[ord[0]]]
1046 is the smallest line and will be next
1048 register int i
, j
, t
;
1050 /* Allocate space for a saved line if necessary. */
1053 savealloc
= linelength
;
1054 saved
.text
= xmalloc (savealloc
);
1057 /* Read initial lines from each input file. */
1058 for (i
= 0; i
< nfps
; ++i
)
1060 initbuf (&buffer
[i
], mergealloc
);
1061 /* If a file is empty, eliminate it from future consideration. */
1062 while (i
< nfps
&& !fillbuf (&buffer
[i
], fps
[i
]))
1066 for (j
= i
; j
< nfps
; ++j
)
1067 fps
[j
] = fps
[j
+ 1];
1070 free (buffer
[i
].buf
);
1073 initlines (&lines
[i
], mergealloc
/ linelength
+ 1,
1074 LINEALLOC
/ ((NMERGE
+ NMERGE
) * sizeof (struct line
)));
1075 findlines (&buffer
[i
], &lines
[i
]);
1080 /* Set up the ord table according to comparisons among input lines.
1081 Since this only reorders two items if one is strictly greater than
1082 the other, it is stable. */
1083 for (i
= 0; i
< nfps
; ++i
)
1085 for (i
= 1; i
< nfps
; ++i
)
1086 if (compare (&lines
[ord
[i
- 1]].lines
[cur
[ord
[i
- 1]]],
1087 &lines
[ord
[i
]].lines
[cur
[ord
[i
]]]) > 0)
1088 t
= ord
[i
- 1], ord
[i
- 1] = ord
[i
], ord
[i
] = t
, i
= 0;
1090 /* Repeatedly output the smallest line until no input remains. */
1093 /* If uniqified output is turned on, output only the first of
1094 an identical series of lines. */
1097 if (savedflag
&& compare (&saved
, &lines
[ord
[0]].lines
[cur
[ord
[0]]]))
1099 xfwrite (saved
.text
, 1, saved
.length
, ofp
);
1105 if (savealloc
< lines
[ord
[0]].lines
[cur
[ord
[0]]].length
+ 1)
1107 while (savealloc
< lines
[ord
[0]].lines
[cur
[ord
[0]]].length
+ 1)
1109 saved
.text
= xrealloc (saved
.text
, savealloc
);
1111 saved
.length
= lines
[ord
[0]].lines
[cur
[ord
[0]]].length
;
1112 memcpy (saved
.text
, lines
[ord
[0]].lines
[cur
[ord
[0]]].text
,
1114 if (lines
[ord
[0]].lines
[cur
[ord
[0]]].keybeg
!= NULL
)
1116 saved
.keybeg
= saved
.text
+
1117 (lines
[ord
[0]].lines
[cur
[ord
[0]]].keybeg
1118 - lines
[ord
[0]].lines
[cur
[ord
[0]]].text
);
1120 if (lines
[ord
[0]].lines
[cur
[ord
[0]]].keylim
!= NULL
)
1122 saved
.keylim
= saved
.text
+
1123 (lines
[ord
[0]].lines
[cur
[ord
[0]]].keylim
1124 - lines
[ord
[0]].lines
[cur
[ord
[0]]].text
);
1131 xfwrite (lines
[ord
[0]].lines
[cur
[ord
[0]]].text
, 1,
1132 lines
[ord
[0]].lines
[cur
[ord
[0]]].length
, ofp
);
1136 /* Check if we need to read more lines into core. */
1137 if (++cur
[ord
[0]] == lines
[ord
[0]].used
)
1138 if (fillbuf (&buffer
[ord
[0]], fps
[ord
[0]]))
1140 findlines (&buffer
[ord
[0]], &lines
[ord
[0]]);
1145 /* We reached EOF on fps[ord[0]]. */
1146 for (i
= 1; i
< nfps
; ++i
)
1147 if (ord
[i
] > ord
[0])
1150 xfclose (fps
[ord
[0]]);
1151 free (buffer
[ord
[0]].buf
);
1152 free ((char *) lines
[ord
[0]].lines
);
1153 for (i
= ord
[0]; i
< nfps
; ++i
)
1155 fps
[i
] = fps
[i
+ 1];
1156 buffer
[i
] = buffer
[i
+ 1];
1157 lines
[i
] = lines
[i
+ 1];
1158 cur
[i
] = cur
[i
+ 1];
1160 for (i
= 0; i
< nfps
; ++i
)
1161 ord
[i
] = ord
[i
+ 1];
1165 /* The new line just read in may be larger than other lines
1166 already in core; push it back in the queue until we encounter
1167 a line larger than it. */
1168 for (i
= 1; i
< nfps
; ++i
)
1170 t
= compare (&lines
[ord
[0]].lines
[cur
[ord
[0]]],
1171 &lines
[ord
[i
]].lines
[cur
[ord
[i
]]]);
1173 t
= ord
[0] - ord
[i
];
1178 for (j
= 1; j
< i
; ++j
)
1179 ord
[j
- 1] = ord
[j
];
1183 if (unique
&& savedflag
)
1185 xfwrite (saved
.text
, 1, saved
.length
, ofp
);
1191 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1194 sortlines (lines
, nlines
, temp
)
1195 struct line
*lines
, *temp
;
1198 register struct line
*lo
, *hi
, *t
;
1199 register int nlo
, nhi
;
1203 if (compare (&lines
[0], &lines
[1]) > 0)
1204 *temp
= lines
[0], lines
[0] = lines
[1], lines
[1] = *temp
;
1214 sortlines (lo
, nlo
, temp
);
1217 sortlines (hi
, nhi
, temp
);
1222 if (compare (lo
, hi
) <= 0)
1223 *t
++ = *lo
++, --nlo
;
1225 *t
++ = *hi
++, --nhi
;
1229 for (lo
= lines
, nlo
= nlines
- nhi
, t
= temp
; nlo
; --nlo
)
1233 /* Check that each of the NFILES FILES is ordered.
1234 Return a count of disordered files. */
1237 check (files
, nfiles
)
1241 int i
, disorders
= 0;
1244 for (i
= 0; i
< nfiles
; ++i
)
1246 fp
= xfopen (files
[i
], "r");
1249 printf ("%s: disorder on %s\n", program_name
, files
[i
]);
1256 /* Merge NFILES FILES onto OFP. */
1259 merge (files
, nfiles
, ofp
)
1266 FILE *fps
[NMERGE
], *tfp
;
1268 while (nfiles
> NMERGE
)
1271 for (i
= 0; i
< nfiles
/ NMERGE
; ++i
)
1273 for (j
= 0; j
< NMERGE
; ++j
)
1274 fps
[j
] = xfopen (files
[i
* NMERGE
+ j
], "r");
1275 tfp
= xfopen (temp
= tempname (), "w");
1276 mergefps (fps
, NMERGE
, tfp
);
1278 for (j
= 0; j
< NMERGE
; ++j
)
1279 zaptemp (files
[i
* NMERGE
+ j
]);
1282 for (j
= 0; j
< nfiles
% NMERGE
; ++j
)
1283 fps
[j
] = xfopen (files
[i
* NMERGE
+ j
], "r");
1284 tfp
= xfopen (temp
= tempname (), "w");
1285 mergefps (fps
, nfiles
% NMERGE
, tfp
);
1287 for (j
= 0; j
< nfiles
% NMERGE
; ++j
)
1288 zaptemp (files
[i
* NMERGE
+ j
]);
1293 for (i
= 0; i
< nfiles
; ++i
)
1294 fps
[i
] = xfopen (files
[i
], "r");
1295 mergefps (fps
, i
, ofp
);
1296 for (i
= 0; i
< nfiles
; ++i
)
1300 /* Sort NFILES FILES onto OFP. */
1303 sort (files
, nfiles
, ofp
)
1313 struct tempnode
*node
;
1317 initbuf (&buf
, sortalloc
);
1318 initlines (&lines
, sortalloc
/ linelength
+ 1,
1319 LINEALLOC
/ sizeof (struct line
));
1321 tmp
= (struct line
*) xmalloc (ntmp
* sizeof (struct line
));
1325 fp
= xfopen (*files
++, "r");
1326 while (fillbuf (&buf
, fp
))
1328 findlines (&buf
, &lines
);
1329 if (lines
.used
> ntmp
)
1331 while (lines
.used
> ntmp
)
1333 tmp
= (struct line
*)
1334 xrealloc ((char *) tmp
, ntmp
* sizeof (struct line
));
1336 sortlines (lines
.lines
, lines
.used
, tmp
);
1337 if (feof (fp
) && !nfiles
&& !ntemp
&& !buf
.left
)
1342 tfp
= xfopen (tempname (), "w");
1344 for (i
= 0; i
< lines
.used
; ++i
)
1345 if (!unique
|| i
== 0
1346 || compare (&lines
.lines
[i
], &lines
.lines
[i
- 1]))
1348 xfwrite (lines
.lines
[i
].text
, 1, lines
.lines
[i
].length
, tfp
);
1358 free ((char *) lines
.lines
);
1359 free ((char *) tmp
);
1363 tempfiles
= (char **) xmalloc (ntemp
* sizeof (char *));
1365 for (node
= temphead
.next
; i
> 0; node
= node
->next
)
1366 tempfiles
[--i
] = node
->name
;
1367 merge (tempfiles
, ntemp
, ofp
);
1368 free ((char *) tempfiles
);
1372 /* Insert key KEY at the end of the list (`keyhead'). */
1376 struct keyfield
*key
;
1378 struct keyfield
*k
= &keyhead
;
1390 error (2, 0, "invalid field specification `%s'", s
);
1393 /* Handle interrupts and hangups. */
1399 #ifdef _POSIX_VERSION
1400 struct sigaction sigact
;
1402 sigact
.sa_handler
= SIG_DFL
;
1403 sigemptyset (&sigact
.sa_mask
);
1404 sigact
.sa_flags
= 0;
1405 sigaction (sig
, &sigact
, NULL
);
1406 #else /* !_POSIX_VERSION */
1407 signal (sig
, SIG_DFL
);
1408 #endif /* _POSIX_VERSION */
1410 kill (getpid (), sig
);
1413 /* Set the ordering options for KEY specified in S.
1414 Return the address of the first character in S that
1415 is not a valid ordering option.
1416 BLANKTYPE is the kind of blanks that 'b' should skip. */
1419 set_ordering (s
, key
, blanktype
)
1421 struct keyfield
*key
;
1422 enum blanktype blanktype
;
1429 if (blanktype
== bl_start
|| blanktype
== bl_both
)
1430 key
->skipsblanks
= 1;
1431 if (blanktype
== bl_end
|| blanktype
== bl_both
)
1432 key
->skipeblanks
= 1;
1435 key
->ignore
= nondictionary
;
1438 key
->translate
= fold_toupper
;
1442 /* Reserved for comparing floating-point numbers. */
1446 key
->ignore
= nonprinting
;
1453 if (blanktype
== bl_start
|| blanktype
== bl_both
)
1454 key
->skipsblanks
= 1;
1455 if (blanktype
== bl_end
|| blanktype
== bl_both
)
1456 key
->skipeblanks
= 1;
1474 struct keyfield
*key
= NULL
, gkey
;
1477 int checkonly
= 0, mergeonly
= 0, nfiles
= 0;
1478 char *minus
= "-", *outfile
= minus
, **files
, *tmp
;
1480 #ifdef _POSIX_VERSION
1481 struct sigaction oldact
, newact
;
1482 #endif /* _POSIX_VERSION */
1484 program_name
= argv
[0];
1486 parse_long_options (argc
, argv
, "sort", version_string
, usage
);
1488 have_read_stdin
= 0;
1491 temp_file_prefix
= getenv ("TMPDIR");
1492 if (temp_file_prefix
== NULL
)
1493 temp_file_prefix
= DEFAULT_TMPDIR
;
1495 #ifdef _POSIX_VERSION
1496 newact
.sa_handler
= sighandler
;
1497 sigemptyset (&newact
.sa_mask
);
1498 newact
.sa_flags
= 0;
1500 sigaction (SIGINT
, NULL
, &oldact
);
1501 if (oldact
.sa_handler
!= SIG_IGN
)
1502 sigaction (SIGINT
, &newact
, NULL
);
1503 sigaction (SIGHUP
, NULL
, &oldact
);
1504 if (oldact
.sa_handler
!= SIG_IGN
)
1505 sigaction (SIGHUP
, &newact
, NULL
);
1506 sigaction (SIGPIPE
, NULL
, &oldact
);
1507 if (oldact
.sa_handler
!= SIG_IGN
)
1508 sigaction (SIGPIPE
, &newact
, NULL
);
1509 sigaction (SIGTERM
, NULL
, &oldact
);
1510 if (oldact
.sa_handler
!= SIG_IGN
)
1511 sigaction (SIGTERM
, &newact
, NULL
);
1512 #else /* !_POSIX_VERSION */
1513 if (signal (SIGINT
, SIG_IGN
) != SIG_IGN
)
1514 signal (SIGINT
, sighandler
);
1515 if (signal (SIGHUP
, SIG_IGN
) != SIG_IGN
)
1516 signal (SIGHUP
, sighandler
);
1517 if (signal (SIGPIPE
, SIG_IGN
) != SIG_IGN
)
1518 signal (SIGPIPE
, sighandler
);
1519 if (signal (SIGTERM
, SIG_IGN
) != SIG_IGN
)
1520 signal (SIGTERM
, sighandler
);
1521 #endif /* !_POSIX_VERSION */
1523 gkey
.sword
= gkey
.eword
= -1;
1525 gkey
.translate
= NULL
;
1526 gkey
.numeric
= gkey
.month
= gkey
.reverse
= 0;
1527 gkey
.skipsblanks
= gkey
.skipeblanks
= 0;
1529 files
= (char **) xmalloc (sizeof (char *) * argc
);
1531 for (i
= 1; i
< argc
; ++i
)
1533 if (argv
[i
][0] == '+')
1537 key
= (struct keyfield
*) xmalloc (sizeof (struct keyfield
));
1540 key
->translate
= NULL
;
1541 key
->skipsblanks
= key
->skipeblanks
= 0;
1542 key
->numeric
= key
->month
= key
->reverse
= 0;
1544 if (!digits
[UCHAR (*s
)])
1545 badfieldspec (argv
[i
]);
1546 for (t
= 0; digits
[UCHAR (*s
)]; ++s
)
1547 t
= 10 * t
+ *s
- '0';
1550 for (++s
; digits
[UCHAR (*s
)]; ++s
)
1551 t2
= 10 * t2
+ *s
- '0';
1559 s
= set_ordering (s
, key
, bl_start
);
1561 badfieldspec (argv
[i
]);
1563 else if (argv
[i
][0] == '-' && argv
[i
][1])
1566 if (digits
[UCHAR (*s
)])
1570 for (t
= 0; digits
[UCHAR (*s
)]; ++s
)
1571 t
= t
* 10 + *s
- '0';
1574 for (++s
; digits
[UCHAR (*s
)]; ++s
)
1575 t2
= t2
* 10 + *s
- '0';
1578 s
= set_ordering (s
, key
, bl_end
);
1580 badfieldspec (argv
[i
]);
1587 s
= set_ordering (s
, &gkey
, bl_both
);
1601 error (2, 0, "option `-k' requires an argument");
1607 key
= (struct keyfield
*)
1608 xmalloc (sizeof (struct keyfield
));
1611 key
->translate
= NULL
;
1612 key
->skipsblanks
= key
->skipeblanks
= 0;
1613 key
->numeric
= key
->month
= key
->reverse
= 0;
1615 if (!digits
[UCHAR (*s
)])
1616 badfieldspec (argv
[i
]);
1617 for (t
= 0; digits
[UCHAR (*s
)]; ++s
)
1618 t
= 10 * t
+ *s
- '0';
1624 for (++s
; digits
[UCHAR (*s
)]; ++s
)
1625 t2
= 10 * t2
+ *s
- '0';
1636 s
= set_ordering (s
, key
, bl_start
);
1637 if (*s
&& *s
!= ',')
1638 badfieldspec (argv
[i
]);
1642 for (t
= 0; digits
[UCHAR (*s
)]; ++s
)
1643 t
= t
* 10 + *s
- '0';
1649 for (++s
; digits
[UCHAR (*s
)]; ++s
)
1650 t2
= t2
* 10 + *s
- '0';
1656 s
= set_ordering (s
, key
, bl_end
);
1658 badfieldspec (argv
[i
]);
1672 error (2, 0, "option `-o' requires an argument");
1674 outfile
= argv
[++i
];
1683 else if (i
< argc
- 1)
1689 error (2, 0, "option `-t' requires an argument");
1693 temp_file_prefix
= ++s
;
1697 temp_file_prefix
= argv
[++i
];
1699 error (2, 0, "option `-T' requires an argument");
1707 /* Accept and ignore e.g. -y0 for compatibility with
1711 fprintf (stderr
, "%s: unrecognized option `-%c'\n",
1719 else /* Not an option. */
1721 files
[nfiles
++] = argv
[i
];
1729 /* Inheritance of global options to individual keys. */
1730 for (key
= keyhead
.next
; key
; key
= key
->next
)
1731 if (!key
->ignore
&& !key
->translate
&& !key
->skipsblanks
&& !key
->reverse
1732 && !key
->skipeblanks
&& !key
->month
&& !key
->numeric
)
1734 key
->ignore
= gkey
.ignore
;
1735 key
->translate
= gkey
.translate
;
1736 key
->skipsblanks
= gkey
.skipsblanks
;
1737 key
->skipeblanks
= gkey
.skipeblanks
;
1738 key
->month
= gkey
.month
;
1739 key
->numeric
= gkey
.numeric
;
1740 key
->reverse
= gkey
.reverse
;
1743 if (!keyhead
.next
&& (gkey
.ignore
|| gkey
.translate
|| gkey
.skipsblanks
1744 || gkey
.skipeblanks
|| gkey
.month
|| gkey
.numeric
))
1746 reverse
= gkey
.reverse
;
1755 exit (check (files
, nfiles
) != 0);
1757 if (strcmp (outfile
, "-"))
1759 struct stat outstat
;
1760 if (stat (outfile
, &outstat
) == 0)
1762 /* The following code prevents a race condition when
1763 people use the brain dead shell programming idiom:
1764 cat file | sort -o file
1765 This feature is provided for historical compatibility,
1766 but we strongly discourage ever relying on this in
1767 new shell programs. */
1769 /* Temporarily copy each input file that might be another name
1770 for the output file. When in doubt (e.g. a pipe), copy. */
1771 for (i
= 0; i
< nfiles
; ++i
)
1777 if (S_ISREG (outstat
.st_mode
) && strcmp (outfile
, files
[i
]))
1780 if ((strcmp (files
[i
], "-")
1781 ? stat (files
[i
], &instat
)
1782 : fstat (fileno (stdin
), &instat
)) != 0)
1784 error (0, errno
, "%s", files
[i
]);
1788 if (S_ISREG (instat
.st_mode
)
1789 && (instat
.st_ino
!= outstat
.st_ino
1790 || instat
.st_dev
!= outstat
.st_dev
))
1792 /* We know the files are distinct. */
1797 fp
= xfopen (files
[i
], "r");
1799 ofp
= xfopen (tmp
, "w");
1800 while ((cc
= fread (buf
, 1, sizeof buf
, fp
)) > 0)
1801 xfwrite (buf
, 1, cc
, ofp
);
1804 error (0, errno
, "%s", files
[i
]);
1813 ofp
= xfopen (outfile
, "w");
1819 merge (files
, nfiles
, ofp
);
1821 sort (files
, nfiles
, ofp
);
1824 /* If we wait for the implicit flush on exit, and the parent process
1825 has closed stdout (e.g., exec >&- in a shell), then the output file
1826 winds up empty. I don't understand why. This is under SunOS,
1827 Solaris, Ultrix, and Irix. This premature fflush makes the output
1828 reappear. --karl@cs.umb.edu */
1829 if (fflush (ofp
) < 0)
1830 error (1, errno
, "%s: write error", outfile
);
1832 if (have_read_stdin
&& fclose (stdin
) == EOF
)
1833 error (1, errno
, outfile
);
1834 if (ferror (stdout
) || fclose (stdout
) == EOF
)
1835 error (1, errno
, "%s: write error", outfile
);
1845 fprintf (stderr
, "Try `%s --help' for more information.\n",
1850 Usage: %s [OPTION]... [FILE]...\n\
1854 Write sorted concatenation of all FILE(s) to standard output.\n\
1856 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
1857 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
1858 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
1859 -b ignore leading blanks in sort fields or keys\n\
1860 -c check if given files already sorted, do not sort\n\
1861 -d consider only [a-zA-Z0-9 ] characters in keys\n\
1862 -f fold lower case to upper case characters in keys\n\
1863 -i consider only [\\040-\\0176] characters in keys\n\
1864 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
1865 -m merge already sorted files, do not sort\n\
1866 -n compare according to string numerical value, imply -b\n\
1867 -o FILE write result on FILE instead of standard output\n\
1868 -r reverse the result of comparisons\n\
1869 -s stabilize sort by disabling last resort comparison\n\
1870 -t SEP use SEParator instead of non- to whitespace transition\n\
1871 -u with -c, check for strict ordering\n\
1872 -u with -m, only output the first of an equal sequence\n\
1873 --help display this help and exit\n\
1874 --version output version information and exit\n\
1876 POS is F[.C][OPTS], where F is the field number and C the character\n\
1877 position in the field, both counted from zero. OPTS is made up of one\n\
1878 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
1879 for that key. If no key given, use the entire line as key. With no\n\
1880 FILE, or when FILE is -, read standard input.\n\