4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
29 #include "initialize.h"
30 #include "statistics.h"
31 #include "streams_common.h"
38 * utility.c contains the general purpose routines used in various locations
39 * throughout sort. It provides a number of interfaces that maintain local
40 * state relevant to this instance of sort. We discuss the more significant
41 * of these interfaces below.
44 * sort is one of the few Unix utilities that is capable of working "in
45 * place"; that is, sort can manipulate an input file and place its output in
46 * a file of the same name safely. This is handled in this implementation by
47 * the output guard facility. In the case of an interrupt or other fatal
48 * signal, sort essays to restore the original input file.
50 * Temporary file cleanup
51 * Similar to the output guard facility, sort cleans up its temporary files in
52 * the case of interruption (or normal exit, for that matter); this is handled
53 * by registering a list of file pointers for later use by the atexit handler.
55 * Temporary filename security
56 * sort protects against "open-through-link" security attacks by verifying
57 * that the selected temporary file name is unused. If the file name is in
58 * use, the pattern is readjusted until an available name pattern is
62 * sort has a simple buffered I/O facility of its own, to facilitate writing
63 * data in large quantities (particularly for multibyte locales). cxwrite()
64 * is the base routine, while wxwrite(), which handles multibyte buffers, is
65 * built on top of cxwrite().
68 #define XBUFFER_SIZE (32 * KILOBYTE)
71 #define EXIT_FAILURE 1
73 #define EXIT_INTERNAL 3
75 static int held_fd
= -1;
77 static stream_t
**cleanup_chain
= NULL
;
79 static char *output_guard_tempname
= NULL
;
80 static ssize_t output_guard_size
= 0;
81 static char *output_guard_filename
= NULL
;
82 static int output_guard_copy_complete
= 0;
84 static const char *default_tmpdir
= "/var/tmp";
85 static const char *default_template
= "/stmAAAXXXXXX";
86 static const char *default_template_count
= ".00000000";
87 static char *current_tmpdir
;
88 static char *current_template
;
90 static const char PNAME_FMT
[] = "%s: ";
91 static const char ERRNO_FMT
[] = ": %s\n";
92 static const char *pname
= "sort";
95 swap(void **a
, void **b
)
103 __S(stats_incr_swaps());
107 * Temporary file name template handling.
110 reset_file_template()
115 (void) strcpy(current_template
, current_tmpdir
);
116 (void) strcat(current_template
, default_template
);
117 (void) mktemp(current_template
);
118 (void) strcat(current_template
, default_template_count
);
119 } while (lstat(current_template
, &s
) != -1);
126 int n
= strlen(current_template
);
129 for (i
= n
- 1; isdigit((uchar_t
)current_template
[i
]); i
--) {
130 current_template
[i
]++;
131 if (current_template
[i
] > '9')
132 current_template
[i
] = '0';
137 if (!isdigit((uchar_t
)current_template
[i
])) {
139 * Template has been exhausted, so reset.
141 reset_file_template();
144 if (lstat(current_template
, &s
) == 0) {
146 * Our newly bumped template has been anticipated; reset to
147 * avoid possible "link-through" attack.
149 reset_file_template();
156 set_file_template(char **T
)
159 int check_tmpdir
= 0;
162 current_tmpdir
= strdup(*T
);
164 } else if ((current_tmpdir
= getenv("TMPDIR")) != NULL
) {
167 current_tmpdir
= (char *)default_tmpdir
;
171 * Check that the temporary directory given exists, and is a directory.
174 if (stat(current_tmpdir
, &s
) != 0) {
175 warn(gettext("cannot stat temporary directory %s"),
178 current_tmpdir
= (char *)default_tmpdir
;
179 } else if (!S_ISDIR(s
.st_mode
)) {
180 warn(gettext("%s is not a directory; "
181 "using default temporary directory"),
184 current_tmpdir
= (char *)default_tmpdir
;
188 ASSERT(current_tmpdir
!= NULL
);
190 current_template
= safe_realloc(NULL
, strlen(current_tmpdir
)
191 + strlen(default_template
) + strlen(default_template_count
) + 1);
193 reset_file_template();
199 return (current_template
);
203 * Output guard routines.
206 establish_output_guard(sort_t
*S
)
208 struct stat output_stat
;
210 if (S
->m_output_to_stdout
)
213 if (stat(S
->m_output_filename
, &output_stat
) == 0) {
214 stream_t
*strp
= S
->m_input_streams
;
216 while (strp
!= NULL
) {
218 * We needn't protect an empty file.
220 if (!(strp
->s_status
& STREAM_NOTFILE
) &&
221 strp
->s_dev
== output_stat
.st_dev
&&
222 strp
->s_ino
== output_stat
.st_ino
&&
223 strp
->s_filesize
> 0) {
224 output_guard_filename
= S
->m_output_filename
;
225 output_guard_size
= strp
->s_filesize
;
227 ASSERT(output_guard_filename
!= NULL
);
229 if (bump_file_template() < 0)
232 if ((strp
->s_filename
= output_guard_tempname
=
233 strdup(get_file_template())) == NULL
)
236 xcp(output_guard_tempname
,
237 output_guard_filename
, output_guard_size
);
239 output_guard_copy_complete
= 1;
249 remove_output_guard()
251 if (output_guard_tempname
&& unlink(output_guard_tempname
) == -1)
252 warn(gettext("unable to unlink %s"), output_guard_tempname
);
254 output_guard_tempname
= NULL
;
258 set_cleanup_chain(stream_t
**strp
)
260 ASSERT(strp
!= NULL
);
262 cleanup_chain
= strp
;
266 * atexit_handler() cleans up any temporary files outstanding after a fatal
267 * signal, a call to die() or at exit(). To preserve the input file under low
268 * storage conditions (and both the output file and the temporary files are
269 * directed at the same filesystem), we remove all temporary files but the
270 * output guard first, and then restore the original file. Of course, this is
271 * not foolproof, as another writer may have exhausted storage.
278 if (cleanup_chain
&& *cleanup_chain
)
279 for (strp
= *cleanup_chain
; strp
!= NULL
; strp
= strp
->s_next
)
280 stream_unlink_temporary(strp
);
282 if (output_guard_tempname
) {
283 if (output_guard_copy_complete
)
284 xcp(output_guard_filename
, output_guard_tempname
,
287 remove_output_guard();
290 __S(stats_display());
296 const char *format_str
= "%lf%c";
300 size_t phys_total
= sysconf(_SC_PHYS_PAGES
) * sysconf(_SC_PAGESIZE
);
302 if (sscanf(S
, format_str
, &val
, &units
) < 1 || val
< 0)
306 if (val
< 0 || val
> 100)
308 val
*= phys_total
/ 100;
311 case 't' : /* terabytes */
315 case 'g' : /* gigabytes */
319 case 'm' : /* megabytes */
323 case 'k' : /* kilobytes */
327 case 'b' : /* bytes */
332 * default is kilobytes
341 retval
= (size_t)val
;
347 available_memory(size_t mem_limit
)
349 size_t phys_avail
= sysconf(_SC_AVPHYS_PAGES
) * sysconf(_SC_PAGESIZE
);
352 if (mem_limit
!= 0) {
355 * In the debug case, we want to test the temporary files
356 * handling, so no lower bound on the memory limit is imposed.
360 avail
= MAX(64 * KILOBYTE
, mem_limit
);
363 avail
= MAX(64 * KILOBYTE
, MIN(AV_MEM_MULTIPLIER
* phys_avail
/
364 AV_MEM_DIVISOR
, 16 * MEGABYTE
));
367 __S(stats_set_available_memory(avail
));
373 set_memory_ratio(sort_t
*S
, int *numerator
, int *denominator
)
376 *numerator
= CHAR_AVG_LINE
;
377 *denominator
= sizeof (line_rec_t
) + sizeof (line_rec_t
*) +
378 CHAR_AVG_LINE
+ CHAR_AVG_LINE
;
382 if (S
->m_single_byte_locale
) {
383 *numerator
= CHAR_AVG_LINE
;
384 *denominator
= sizeof (line_rec_t
) + sizeof (line_rec_t
*) +
385 CHAR_AVG_LINE
+ XFRM_MULTIPLIER
* CHAR_AVG_LINE
;
389 *numerator
= WCHAR_AVG_LINE
;
390 *denominator
= sizeof (line_rec_t
) + sizeof (line_rec_t
*) +
391 WCHAR_AVG_LINE
+ WCHAR_AVG_LINE
;
395 safe_realloc(void *ptr
, size_t sz
)
398 * safe_realloc() is not meant as an alternative free() mechanism--we
399 * disallow reallocations to size zero.
403 if ((ptr
= realloc(ptr
, sz
)) != NULL
)
406 die(gettext("unable to reallocate buffer"));
408 return (NULL
); /* keep gcc happy */
418 xzmap(void *addr
, size_t len
, int prot
, int flags
, off_t off
)
422 pa
= mmap(addr
, len
, prot
, flags
| MAP_ANON
, -1, off
);
423 if (pa
== MAP_FAILED
)
424 die(gettext("can't mmap anonymous memory"));
432 (void) fprintf(stderr
,
433 gettext("usage: %s [-cmu] [-o output] [-T directory] [-S mem]"
434 " [-z recsz]\n\t[-dfiMnr] [-b] [-t char] [-k keydef]"
435 " [+pos1 [-pos2]] files...\n"), CMDNAME
);
440 * hold_file_descriptor() and release_file_descriptor() reserve a single file
441 * descriptor entry for later use. We issue the hold prior to any loop that has
442 * an exit condition based on the receipt of EMFILE from an open() call; once we
443 * have exited, we can release, typically prior to opening a file for output.
446 hold_file_descriptor()
448 ASSERT(held_fd
== -1);
450 if ((held_fd
= open("/dev/null", O_RDONLY
)) == -1)
451 die(gettext("insufficient available file descriptors\n"));
455 release_file_descriptor()
457 ASSERT(held_fd
!= -1);
459 (void) close(held_fd
);
464 copy_line_rec(const line_rec_t
*a
, line_rec_t
*b
)
466 (void) memcpy(b
, a
, sizeof (line_rec_t
));
475 (void) ungetc(fgetc(f
), f
);
479 * int cxwrite(int, char *, size_t)
482 * cxwrite() implements a buffered version of fwrite(ptr, nbytes, 1, .) on
483 * file descriptors. It returns -1 in the case that the write() fails to
484 * write the current buffer contents. cxwrite() must be flushed before being
485 * applied to a new file descriptor.
488 * 0 on success, -1 on error.
491 cxwrite(int fd
, char *ptr
, size_t nbytes
)
493 static char buffer
[XBUFFER_SIZE
];
494 static size_t offset
= 0;
499 while (offset
-= write(fd
, buffer
, offset
)) {
510 while (nbytes
!= 0) {
511 if (offset
+ nbytes
> XBUFFER_SIZE
)
512 mbytes
= XBUFFER_SIZE
- offset
;
516 (void) memcpy(buffer
+ offset
, ptr
, mbytes
);
523 while (offset
-= write(fd
, buffer
, offset
)) {
537 * int wxwrite(int, wchar_t *)
540 * wxwrite() implements a buffered write() function for null-terminated wide
541 * character buffers with similar calling semantics to cxwrite(). It returns
542 * -1 in the case that it fails to write the current buffer contents.
543 * wxwrite() must be flushed before being applied to a new file descriptor.
546 * 0 on success, -1 on error.
549 wxwrite(int fd
, wchar_t *ptr
)
551 static char *convert_buffer
;
552 static size_t convert_bufsize
= 1024;
556 return (cxwrite(0, 0, 1));
558 if (convert_buffer
== NULL
)
559 convert_buffer
= safe_realloc(NULL
, convert_bufsize
);
561 * We use wcstombs(NULL, ., .) to verify that we have an adequate
562 * buffer size for the conversion. Since this buffer was converted into
563 * wide character format earlier, we can safely assume that the buffer
564 * can be converted back to the external multibyte form.
566 req_bufsize
= wcstombs(NULL
, ptr
, convert_bufsize
);
567 if (req_bufsize
> convert_bufsize
) {
568 convert_bufsize
= req_bufsize
+ 1;
569 convert_buffer
= safe_realloc(convert_buffer
, convert_bufsize
);
572 (void) wcstombs(convert_buffer
, ptr
, convert_bufsize
);
574 return (cxwrite(fd
, convert_buffer
, req_bufsize
));
578 xstreql(const char *a
, const char *b
)
580 return (strcmp(a
, b
) == 0);
584 xstrneql(const char *a
, const char *b
, const size_t l
)
586 return (strncmp(a
, b
, l
) == 0);
590 xstrnchr(const char *S
, const int c
, const size_t n
)
592 const char *eS
= S
+ n
;
603 xstrninv(char *s
, ssize_t start
, ssize_t length
)
607 for (i
= start
; i
< start
+ length
; i
++)
608 s
[i
] = UCHAR_MAX
- s
[i
];
612 xwcsneql(const wchar_t *a
, const wchar_t *b
, const size_t length
)
614 return (wcsncmp(a
, b
, length
) == 0);
618 xwsnchr(const wchar_t *ws
, const wint_t wc
, const size_t n
)
620 const wchar_t *ews
= ws
+ n
;
623 if (*ws
== (wchar_t)wc
)
624 return ((wchar_t *)ws
);
625 } while (++ws
< ews
);
631 xwcsninv(wchar_t *s
, ssize_t start
, ssize_t length
)
635 for (i
= start
; i
< start
+ length
; i
++)
636 s
[i
] = WCHAR_MAX
- s
[i
];
639 #ifdef _LITTLE_ENDIAN
641 xwcsntomsb(wchar_t *s
, ssize_t length
)
645 ASSERT(sizeof (wchar_t) == sizeof (uint32_t));
647 for (i
= 0; i
< length
; i
++, s
++) {
660 #endif /* _LITTLE_ENDIAN */
663 xmemwchar(wchar_t *s
, wchar_t w
, ssize_t length
)
677 xcp(char *dst
, char *src
, off_t size
)
681 size_t chunksize
= 2 * MEGABYTE
;
683 ssize_t nchunks
= size
/ chunksize
;
684 ssize_t lastchunk
= size
% chunksize
;
686 if (dst
== NULL
|| src
== NULL
)
689 if ((fd_in
= open(src
, O_RDONLY
)) < 0)
691 if ((fd_out
= open(dst
, O_RDWR
| O_CREAT
| O_TRUNC
, OUTPUT_MODE
)) < 0)
694 for (i
= 0; i
< nchunks
; i
++) {
695 if ((mm_in
= mmap(NULL
, chunksize
, PROT_READ
, MAP_SHARED
, fd_in
,
696 i
* chunksize
)) == MAP_FAILED
)
699 if (write(fd_out
, mm_in
, chunksize
) != chunksize
)
700 die(EMSG_WRITE
, dst
);
702 (void) munmap(mm_in
, chunksize
);
706 if ((mm_in
= mmap(NULL
, lastchunk
, PROT_READ
, MAP_SHARED
, fd_in
,
707 nchunks
* chunksize
)) == MAP_FAILED
)
710 if (write(fd_out
, mm_in
, lastchunk
) != lastchunk
)
711 die(EMSG_WRITE
, dst
);
713 (void) munmap(mm_in
, lastchunk
);
718 if (close(fd_out
) == -1)
719 die(EMSG_CLOSE
, dst
);
724 warn(const char *format
, ...)
730 (void) fprintf(stderr
, gettext(PNAME_FMT
), pname
);
732 va_start(alist
, format
);
733 (void) vfprintf(stderr
, format
, alist
);
736 if (strrchr(format
, '\n') == NULL
)
737 (void) fprintf(stderr
, gettext(ERRNO_FMT
), strerror(err
));
742 die(const char *format
, ...)
748 (void) fprintf(stderr
, gettext(PNAME_FMT
), pname
);
750 va_start(alist
, format
);
751 (void) vfprintf(stderr
, format
, alist
);
754 if (strrchr(format
, '\n') == NULL
)
755 (void) fprintf(stderr
, gettext(ERRNO_FMT
), strerror(err
));
762 * pprintc() is called only by xdump().
764 #define BYTES_PER_LINE 16
766 pprintc(FILE *fp
, char c
)
768 if (isspace((uchar_t
)c
))
769 (void) fprintf(fp
, " ");
770 else if (isprint((uchar_t
)c
))
771 (void) fprintf(fp
, "%c", c
);
773 (void) fprintf(fp
, ".");
777 pprintwc(FILE *fp
, wchar_t c
)
780 (void) fprintf(fp
, " ");
781 else if (iswprint(c
))
782 (void) fprintf(fp
, "%wc", c
);
784 (void) fprintf(fp
, ".");
788 * xdump() is used only for debugging purposes.
791 xdump(FILE *fp
, uchar_t
*buf
, size_t bufsize
, int wide
)
795 uchar_t d
[BYTES_PER_LINE
];
797 for (; nc
< bufsize
; buf
++) {
798 d
[nc
% BYTES_PER_LINE
] = *buf
;
799 if (nc
% BYTES_PER_LINE
== 0) {
800 (void) fprintf(fp
, "%08x:", nc
);
802 (void) fprintf(fp
, " %02x", *buf
);
804 if (nc
% BYTES_PER_LINE
== 0) {
805 (void) fprintf(fp
, " ");
807 for (i
= 0; i
< BYTES_PER_LINE
;
808 i
+= sizeof (wchar_t))
809 pprintwc(fp
, *(wchar_t *)(d
+ i
));
811 for (i
= 0; i
< BYTES_PER_LINE
; i
++)
814 (void) fprintf(fp
, "\n");
818 for (i
= nc
% BYTES_PER_LINE
; i
< BYTES_PER_LINE
; i
++)
819 (void) fprintf(fp
, " ");
821 (void) fprintf(fp
, " ");
824 for (i
= 0; i
< nc
% BYTES_PER_LINE
; i
+= sizeof (wchar_t))
825 pprintwc(fp
, *(wchar_t *)(d
+ i
));
827 for (i
= 0; i
< nc
% BYTES_PER_LINE
; i
++)
831 (void) fprintf(fp
, "\n");