dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / usr / src / cmd / sort / common / utility.c
blobbb9e9c743d41790e05afcf401ee516cc4c398a00
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include "utility.h"
29 #include "initialize.h"
30 #include "statistics.h"
31 #include "streams_common.h"
32 #include "streams.h"
35 * utility
37 * Overview
38 * utility.c contains the general purpose routines used in various locations
39 * throughout sort. It provides a number of interfaces that maintain local
40 * state relevant to this instance of sort. We discuss the more significant
41 * of these interfaces below.
43 * Output guard
44 * sort is one of the few Unix utilities that is capable of working "in
45 * place"; that is, sort can manipulate an input file and place its output in
46 * a file of the same name safely. This is handled in this implementation by
47 * the output guard facility. In the case of an interrupt or other fatal
48 * signal, sort essays to restore the original input file.
50 * Temporary file cleanup
51 * Similar to the output guard facility, sort cleans up its temporary files in
52 * the case of interruption (or normal exit, for that matter); this is handled
53 * by registering a list of file pointers for later use by the atexit handler.
55 * Temporary filename security
56 * sort protects against "open-through-link" security attacks by verifying
57 * that the selected temporary file name is unused. If the file name is in
58 * use, the pattern is readjusted until an available name pattern is
59 * discovered.
61 * Buffered I/O
62 * sort has a simple buffered I/O facility of its own, to facilitate writing
63 * data in large quantities (particularly for multibyte locales). cxwrite()
64 * is the base routine, while wxwrite(), which handles multibyte buffers, is
65 * built on top of cxwrite().
68 #define XBUFFER_SIZE (32 * KILOBYTE)
70 #define EXIT_OK 0
71 #define EXIT_FAILURE 1
72 #define EXIT_ERROR 2
73 #define EXIT_INTERNAL 3
75 static int held_fd = -1;
77 static stream_t **cleanup_chain = NULL;
79 static char *output_guard_tempname = NULL;
80 static ssize_t output_guard_size = 0;
81 static char *output_guard_filename = NULL;
82 static int output_guard_copy_complete = 0;
84 static const char *default_tmpdir = "/var/tmp";
85 static const char *default_template = "/stmAAAXXXXXX";
86 static const char *default_template_count = ".00000000";
87 static char *current_tmpdir;
88 static char *current_template;
90 static const char PNAME_FMT[] = "%s: ";
91 static const char ERRNO_FMT[] = ": %s\n";
92 static const char *pname = "sort";
94 void
95 swap(void **a, void **b)
97 void *t;
99 t = *a;
100 *a = *b;
101 *b = t;
103 __S(stats_incr_swaps());
107 * Temporary file name template handling.
109 static void
110 reset_file_template()
112 struct stat s;
114 do {
115 (void) strcpy(current_template, current_tmpdir);
116 (void) strcat(current_template, default_template);
117 (void) mktemp(current_template);
118 (void) strcat(current_template, default_template_count);
119 } while (lstat(current_template, &s) != -1);
123 bump_file_template()
125 struct stat s;
126 int n = strlen(current_template);
127 int i;
129 for (i = n - 1; isdigit((uchar_t)current_template[i]); i--) {
130 current_template[i]++;
131 if (current_template[i] > '9')
132 current_template[i] = '0';
133 else
134 break;
137 if (!isdigit((uchar_t)current_template[i])) {
139 * Template has been exhausted, so reset.
141 reset_file_template();
144 if (lstat(current_template, &s) == 0) {
146 * Our newly bumped template has been anticipated; reset to
147 * avoid possible "link-through" attack.
149 reset_file_template();
152 return (0);
155 void
156 set_file_template(char **T)
158 struct stat s;
159 int check_tmpdir = 0;
161 if (*T != NULL) {
162 current_tmpdir = strdup(*T);
163 check_tmpdir = 1;
164 } else if ((current_tmpdir = getenv("TMPDIR")) != NULL) {
165 check_tmpdir = 1;
166 } else {
167 current_tmpdir = (char *)default_tmpdir;
171 * Check that the temporary directory given exists, and is a directory.
173 if (check_tmpdir) {
174 if (stat(current_tmpdir, &s) != 0) {
175 warn(gettext("cannot stat temporary directory %s"),
176 current_tmpdir);
178 current_tmpdir = (char *)default_tmpdir;
179 } else if (!S_ISDIR(s.st_mode)) {
180 warn(gettext("%s is not a directory; "
181 "using default temporary directory"),
182 current_tmpdir);
184 current_tmpdir = (char *)default_tmpdir;
188 ASSERT(current_tmpdir != NULL);
190 current_template = safe_realloc(NULL, strlen(current_tmpdir)
191 + strlen(default_template) + strlen(default_template_count) + 1);
193 reset_file_template();
196 char *
197 get_file_template()
199 return (current_template);
203 * Output guard routines.
205 void
206 establish_output_guard(sort_t *S)
208 struct stat output_stat;
210 if (S->m_output_to_stdout)
211 return;
213 if (stat(S->m_output_filename, &output_stat) == 0) {
214 stream_t *strp = S->m_input_streams;
216 while (strp != NULL) {
218 * We needn't protect an empty file.
220 if (!(strp->s_status & STREAM_NOTFILE) &&
221 strp->s_dev == output_stat.st_dev &&
222 strp->s_ino == output_stat.st_ino &&
223 strp->s_filesize > 0) {
224 output_guard_filename = S->m_output_filename;
225 output_guard_size = strp->s_filesize;
227 ASSERT(output_guard_filename != NULL);
229 if (bump_file_template() < 0)
230 die(EMSG_TEMPORARY);
232 if ((strp->s_filename = output_guard_tempname =
233 strdup(get_file_template())) == NULL)
234 die(EMSG_ALLOC);
236 xcp(output_guard_tempname,
237 output_guard_filename, output_guard_size);
239 output_guard_copy_complete = 1;
241 return;
243 strp = strp->s_next;
248 void
249 remove_output_guard()
251 if (output_guard_tempname && unlink(output_guard_tempname) == -1)
252 warn(gettext("unable to unlink %s"), output_guard_tempname);
254 output_guard_tempname = NULL;
257 void
258 set_cleanup_chain(stream_t **strp)
260 ASSERT(strp != NULL);
262 cleanup_chain = strp;
266 * atexit_handler() cleans up any temporary files outstanding after a fatal
267 * signal, a call to die() or at exit(). To preserve the input file under low
268 * storage conditions (and both the output file and the temporary files are
269 * directed at the same filesystem), we remove all temporary files but the
270 * output guard first, and then restore the original file. Of course, this is
271 * not foolproof, as another writer may have exhausted storage.
273 void
274 atexit_handler()
276 stream_t *strp;
278 if (cleanup_chain && *cleanup_chain)
279 for (strp = *cleanup_chain; strp != NULL; strp = strp->s_next)
280 stream_unlink_temporary(strp);
282 if (output_guard_tempname) {
283 if (output_guard_copy_complete)
284 xcp(output_guard_filename, output_guard_tempname,
285 output_guard_size);
287 remove_output_guard();
290 __S(stats_display());
293 size_t
294 strtomem(char *S)
296 const char *format_str = "%lf%c";
297 double val = 0.0;
298 size_t retval;
299 char units = 'k';
300 size_t phys_total = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
302 if (sscanf(S, format_str, &val, &units) < 1 || val < 0)
303 return (0);
305 if (units == '%') {
306 if (val < 0 || val > 100)
307 return (0);
308 val *= phys_total / 100;
309 } else
310 switch (units) {
311 case 't' : /* terabytes */
312 case 'T' :
313 val *= 1024;
314 /*FALLTHROUGH*/
315 case 'g' : /* gigabytes */
316 case 'G' :
317 val *= 1024;
318 /*FALLTHROUGH*/
319 case 'm' : /* megabytes */
320 case 'M' :
321 val *= 1024;
322 /*FALLTHROUGH*/
323 case 'k' : /* kilobytes */
324 case 'K' :
325 val *= 1024;
326 /*FALLTHROUGH*/
327 case 'b' : /* bytes */
328 case 'B' :
329 break;
330 default :
332 * default is kilobytes
334 val *= 1024;
335 break;
338 if (val > SIZE_MAX)
339 return (0);
341 retval = (size_t)val;
343 return (retval);
346 size_t
347 available_memory(size_t mem_limit)
349 size_t phys_avail = sysconf(_SC_AVPHYS_PAGES) * sysconf(_SC_PAGESIZE);
350 size_t avail;
352 if (mem_limit != 0) {
353 #ifdef DEBUG
355 * In the debug case, we want to test the temporary files
356 * handling, so no lower bound on the memory limit is imposed.
358 avail = mem_limit;
359 #else
360 avail = MAX(64 * KILOBYTE, mem_limit);
361 #endif /* DEBUG */
362 } else {
363 avail = MAX(64 * KILOBYTE, MIN(AV_MEM_MULTIPLIER * phys_avail /
364 AV_MEM_DIVISOR, 16 * MEGABYTE));
367 __S(stats_set_available_memory(avail));
369 return (avail);
372 void
373 set_memory_ratio(sort_t *S, int *numerator, int *denominator)
375 if (S->m_c_locale) {
376 *numerator = CHAR_AVG_LINE;
377 *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) +
378 CHAR_AVG_LINE + CHAR_AVG_LINE;
379 return;
382 if (S->m_single_byte_locale) {
383 *numerator = CHAR_AVG_LINE;
384 *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) +
385 CHAR_AVG_LINE + XFRM_MULTIPLIER * CHAR_AVG_LINE;
386 return;
389 *numerator = WCHAR_AVG_LINE;
390 *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) +
391 WCHAR_AVG_LINE + WCHAR_AVG_LINE;
394 void *
395 safe_realloc(void *ptr, size_t sz)
398 * safe_realloc() is not meant as an alternative free() mechanism--we
399 * disallow reallocations to size zero.
401 ASSERT(sz != 0);
403 if ((ptr = realloc(ptr, sz)) != NULL)
404 return (ptr);
406 die(gettext("unable to reallocate buffer"));
407 /*NOTREACHED*/
408 return (NULL); /* keep gcc happy */
411 void
412 safe_free(void *ptr)
414 free(ptr);
417 void *
418 xzmap(void *addr, size_t len, int prot, int flags, off_t off)
420 void *pa;
422 pa = mmap(addr, len, prot, flags | MAP_ANON, -1, off);
423 if (pa == MAP_FAILED)
424 die(gettext("can't mmap anonymous memory"));
426 return (pa);
429 void
430 usage()
432 (void) fprintf(stderr,
433 gettext("usage: %s [-cmu] [-o output] [-T directory] [-S mem]"
434 " [-z recsz]\n\t[-dfiMnr] [-b] [-t char] [-k keydef]"
435 " [+pos1 [-pos2]] files...\n"), CMDNAME);
436 exit(E_USAGE);
440 * hold_file_descriptor() and release_file_descriptor() reserve a single file
441 * descriptor entry for later use. We issue the hold prior to any loop that has
442 * an exit condition based on the receipt of EMFILE from an open() call; once we
443 * have exited, we can release, typically prior to opening a file for output.
445 void
446 hold_file_descriptor()
448 ASSERT(held_fd == -1);
450 if ((held_fd = open("/dev/null", O_RDONLY)) == -1)
451 die(gettext("insufficient available file descriptors\n"));
454 void
455 release_file_descriptor()
457 ASSERT(held_fd != -1);
459 (void) close(held_fd);
460 held_fd = -1;
463 void
464 copy_line_rec(const line_rec_t *a, line_rec_t *b)
466 (void) memcpy(b, a, sizeof (line_rec_t));
469 void
470 trip_eof(FILE *f)
472 if (feof(f))
473 return;
475 (void) ungetc(fgetc(f), f);
479 * int cxwrite(int, char *, size_t)
481 * Overview
482 * cxwrite() implements a buffered version of fwrite(ptr, nbytes, 1, .) on
483 * file descriptors. It returns -1 in the case that the write() fails to
484 * write the current buffer contents. cxwrite() must be flushed before being
485 * applied to a new file descriptor.
487 * Return values
488 * 0 on success, -1 on error.
491 cxwrite(int fd, char *ptr, size_t nbytes)
493 static char buffer[XBUFFER_SIZE];
494 static size_t offset = 0;
495 size_t mbytes;
497 if (ptr == NULL) {
498 errno = 0;
499 while (offset -= write(fd, buffer, offset)) {
500 if (errno)
501 break;
504 if (offset)
505 return (-1);
507 return (0);
510 while (nbytes != 0) {
511 if (offset + nbytes > XBUFFER_SIZE)
512 mbytes = XBUFFER_SIZE - offset;
513 else
514 mbytes = nbytes;
516 (void) memcpy(buffer + offset, ptr, mbytes);
517 nbytes -= mbytes;
518 offset += mbytes;
519 ptr += mbytes;
521 if (nbytes) {
522 errno = 0;
523 while (offset -= write(fd, buffer, offset)) {
524 if (errno)
525 break;
528 if (offset)
529 return (-1);
533 return (0);
537 * int wxwrite(int, wchar_t *)
539 * Overview
540 * wxwrite() implements a buffered write() function for null-terminated wide
541 * character buffers with similar calling semantics to cxwrite(). It returns
542 * -1 in the case that it fails to write the current buffer contents.
543 * wxwrite() must be flushed before being applied to a new file descriptor.
545 * Return values
546 * 0 on success, -1 on error.
549 wxwrite(int fd, wchar_t *ptr)
551 static char *convert_buffer;
552 static size_t convert_bufsize = 1024;
553 size_t req_bufsize;
555 if (ptr == NULL)
556 return (cxwrite(0, 0, 1));
558 if (convert_buffer == NULL)
559 convert_buffer = safe_realloc(NULL, convert_bufsize);
561 * We use wcstombs(NULL, ., .) to verify that we have an adequate
562 * buffer size for the conversion. Since this buffer was converted into
563 * wide character format earlier, we can safely assume that the buffer
564 * can be converted back to the external multibyte form.
566 req_bufsize = wcstombs(NULL, ptr, convert_bufsize);
567 if (req_bufsize > convert_bufsize) {
568 convert_bufsize = req_bufsize + 1;
569 convert_buffer = safe_realloc(convert_buffer, convert_bufsize);
572 (void) wcstombs(convert_buffer, ptr, convert_bufsize);
574 return (cxwrite(fd, convert_buffer, req_bufsize));
578 xstreql(const char *a, const char *b)
580 return (strcmp(a, b) == 0);
584 xstrneql(const char *a, const char *b, const size_t l)
586 return (strncmp(a, b, l) == 0);
589 char *
590 xstrnchr(const char *S, const int c, const size_t n)
592 const char *eS = S + n;
594 do {
595 if (*S == (char)c)
596 return ((char *)S);
597 } while (++S < eS);
599 return (NULL);
602 void
603 xstrninv(char *s, ssize_t start, ssize_t length)
605 ssize_t i;
607 for (i = start; i < start + length; i++)
608 s[i] = UCHAR_MAX - s[i];
612 xwcsneql(const wchar_t *a, const wchar_t *b, const size_t length)
614 return (wcsncmp(a, b, length) == 0);
617 wchar_t *
618 xwsnchr(const wchar_t *ws, const wint_t wc, const size_t n)
620 const wchar_t *ews = ws + n;
622 do {
623 if (*ws == (wchar_t)wc)
624 return ((wchar_t *)ws);
625 } while (++ws < ews);
627 return (NULL);
630 void
631 xwcsninv(wchar_t *s, ssize_t start, ssize_t length)
633 ssize_t i;
635 for (i = start; i < start + length; i++)
636 s[i] = WCHAR_MAX - s[i];
639 #ifdef _LITTLE_ENDIAN
640 void
641 xwcsntomsb(wchar_t *s, ssize_t length)
643 ssize_t i;
645 ASSERT(sizeof (wchar_t) == sizeof (uint32_t));
647 for (i = 0; i < length; i++, s++) {
648 char *t = (char *)s;
649 char u;
651 u = *t;
652 *t = *(t + 3);
653 *(t + 3) = u;
655 u = *(t + 1);
656 *(t + 1) = *(t + 2);
657 *(t + 2) = u;
660 #endif /* _LITTLE_ENDIAN */
662 wchar_t *
663 xmemwchar(wchar_t *s, wchar_t w, ssize_t length)
665 ssize_t i = length;
667 while (--i > 0) {
668 if (*s == w)
669 return (s);
670 s++;
673 return (NULL);
676 void
677 xcp(char *dst, char *src, off_t size)
679 int fd_in, fd_out;
680 void *mm_in;
681 size_t chunksize = 2 * MEGABYTE;
682 int i;
683 ssize_t nchunks = size / chunksize;
684 ssize_t lastchunk = size % chunksize;
686 if (dst == NULL || src == NULL)
687 return;
689 if ((fd_in = open(src, O_RDONLY)) < 0)
690 die(EMSG_OPEN, src);
691 if ((fd_out = open(dst, O_RDWR | O_CREAT | O_TRUNC, OUTPUT_MODE)) < 0)
692 die(EMSG_OPEN, dst);
694 for (i = 0; i < nchunks; i++) {
695 if ((mm_in = mmap(NULL, chunksize, PROT_READ, MAP_SHARED, fd_in,
696 i * chunksize)) == MAP_FAILED)
697 die(EMSG_MMAP, src);
699 if (write(fd_out, mm_in, chunksize) != chunksize)
700 die(EMSG_WRITE, dst);
702 (void) munmap(mm_in, chunksize);
705 if (lastchunk) {
706 if ((mm_in = mmap(NULL, lastchunk, PROT_READ, MAP_SHARED, fd_in,
707 nchunks * chunksize)) == MAP_FAILED)
708 die(EMSG_MMAP, src);
710 if (write(fd_out, mm_in, lastchunk) != lastchunk)
711 die(EMSG_WRITE, dst);
713 (void) munmap(mm_in, lastchunk);
716 (void) close(fd_in);
718 if (close(fd_out) == -1)
719 die(EMSG_CLOSE, dst);
722 /*PRINTFLIKE1*/
723 void
724 warn(const char *format, ...)
726 int err = errno;
727 va_list alist;
729 if (pname != NULL)
730 (void) fprintf(stderr, gettext(PNAME_FMT), pname);
732 va_start(alist, format);
733 (void) vfprintf(stderr, format, alist);
734 va_end(alist);
736 if (strrchr(format, '\n') == NULL)
737 (void) fprintf(stderr, gettext(ERRNO_FMT), strerror(err));
740 /*PRINTFLIKE1*/
741 void
742 die(const char *format, ...)
744 int err = errno;
745 va_list alist;
747 if (pname != NULL)
748 (void) fprintf(stderr, gettext(PNAME_FMT), pname);
750 va_start(alist, format);
751 (void) vfprintf(stderr, format, alist);
752 va_end(alist);
754 if (strrchr(format, '\n') == NULL)
755 (void) fprintf(stderr, gettext(ERRNO_FMT), strerror(err));
757 exit(E_ERROR);
760 #ifdef DEBUG
762 * pprintc() is called only by xdump().
764 #define BYTES_PER_LINE 16
765 static void
766 pprintc(FILE *fp, char c)
768 if (isspace((uchar_t)c))
769 (void) fprintf(fp, " ");
770 else if (isprint((uchar_t)c))
771 (void) fprintf(fp, "%c", c);
772 else
773 (void) fprintf(fp, ".");
776 static void
777 pprintwc(FILE *fp, wchar_t c)
779 if (iswspace(c))
780 (void) fprintf(fp, " ");
781 else if (iswprint(c))
782 (void) fprintf(fp, "%wc", c);
783 else
784 (void) fprintf(fp, ".");
788 * xdump() is used only for debugging purposes.
790 void
791 xdump(FILE *fp, uchar_t *buf, size_t bufsize, int wide)
793 int i;
794 size_t nc = 0;
795 uchar_t d[BYTES_PER_LINE];
797 for (; nc < bufsize; buf++) {
798 d[nc % BYTES_PER_LINE] = *buf;
799 if (nc % BYTES_PER_LINE == 0) {
800 (void) fprintf(fp, "%08x:", nc);
802 (void) fprintf(fp, " %02x", *buf);
803 nc++;
804 if (nc % BYTES_PER_LINE == 0) {
805 (void) fprintf(fp, " ");
806 if (wide) {
807 for (i = 0; i < BYTES_PER_LINE;
808 i += sizeof (wchar_t))
809 pprintwc(fp, *(wchar_t *)(d + i));
810 } else {
811 for (i = 0; i < BYTES_PER_LINE; i++)
812 pprintc(fp, d[i]);
814 (void) fprintf(fp, "\n");
818 for (i = nc % BYTES_PER_LINE; i < BYTES_PER_LINE; i++)
819 (void) fprintf(fp, " ");
821 (void) fprintf(fp, " ");
823 if (wide) {
824 for (i = 0; i < nc % BYTES_PER_LINE; i += sizeof (wchar_t))
825 pprintwc(fp, *(wchar_t *)(d + i));
826 } else {
827 for (i = 0; i < nc % BYTES_PER_LINE; i++)
828 pprintc(fp, d[i]);
831 (void) fprintf(fp, "\n");
833 #endif /* DEBUG */