8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / cmd / sort / common / utility.c
blob045e723b3ed5c0d445c47d04035dbdf01e999954
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
29 #include "utility.h"
31 #include "initialize.h"
32 #include "statistics.h"
33 #include "streams_common.h"
34 #include "streams.h"
37 * utility
39 * Overview
40 * utility.c contains the general purpose routines used in various locations
41 * throughout sort. It provides a number of interfaces that maintain local
42 * state relevant to this instance of sort. We discuss the more significant
43 * of these interfaces below.
45 * Output guard
46 * sort is one of the few Unix utilities that is capable of working "in
47 * place"; that is, sort can manipulate an input file and place its output in
48 * a file of the same name safely. This is handled in this implementation by
49 * the output guard facility. In the case of an interrupt or other fatal
50 * signal, sort essays to restore the original input file.
52 * Temporary file cleanup
53 * Similar to the output guard facility, sort cleans up its temporary files in
54 * the case of interruption (or normal exit, for that matter); this is handled
55 * by registering a list of file pointers for later use by the atexit handler.
57 * Temporary filename security
58 * sort protects against "open-through-link" security attacks by verifying
59 * that the selected temporary file name is unused. If the file name is in
60 * use, the pattern is readjusted until an available name pattern is
61 * discovered.
63 * Buffered I/O
64 * sort has a simple buffered I/O facility of its own, to facilitate writing
65 * data in large quantities (particularly for multibyte locales). cxwrite()
66 * is the base routine, while wxwrite(), which handles multibyte buffers, is
67 * built on top of cxwrite().
70 #define XBUFFER_SIZE (32 * KILOBYTE)
72 #define EXIT_OK 0
73 #define EXIT_FAILURE 1
74 #define EXIT_ERROR 2
75 #define EXIT_INTERNAL 3
77 static int held_fd = -1;
79 static stream_t **cleanup_chain = NULL;
81 static char *output_guard_tempname = NULL;
82 static ssize_t output_guard_size = 0;
83 static char *output_guard_filename = NULL;
84 static int output_guard_copy_complete = 0;
86 static const char *default_tmpdir = "/var/tmp";
87 static const char *default_template = "/stmAAAXXXXXX";
88 static const char *default_template_count = ".00000000";
89 static char *current_tmpdir;
90 static char *current_template;
92 static const char PNAME_FMT[] = "%s: ";
93 static const char ERRNO_FMT[] = ": %s\n";
94 static const char *pname = "sort";
96 void
97 swap(void **a, void **b)
99 void *t;
101 t = *a;
102 *a = *b;
103 *b = t;
105 __S(stats_incr_swaps());
109 * Temporary file name template handling.
111 static void
112 reset_file_template()
114 struct stat s;
116 do {
117 (void) strcpy(current_template, current_tmpdir);
118 (void) strcat(current_template, default_template);
119 (void) mktemp(current_template);
120 (void) strcat(current_template, default_template_count);
121 } while (lstat(current_template, &s) != -1);
125 bump_file_template()
127 struct stat s;
128 int n = strlen(current_template);
129 int i;
131 for (i = n - 1; isdigit((uchar_t)current_template[i]); i--) {
132 current_template[i]++;
133 if (current_template[i] > '9')
134 current_template[i] = '0';
135 else
136 break;
139 if (!isdigit((uchar_t)current_template[i])) {
141 * Template has been exhausted, so reset.
143 reset_file_template();
146 if (lstat(current_template, &s) == 0) {
148 * Our newly bumped template has been anticipated; reset to
149 * avoid possible "link-through" attack.
151 reset_file_template();
154 return (0);
157 void
158 set_file_template(char **T)
160 struct stat s;
161 int check_tmpdir = 0;
163 if (*T != NULL) {
164 current_tmpdir = strdup(*T);
165 check_tmpdir = 1;
166 } else if ((current_tmpdir = getenv("TMPDIR")) != NULL) {
167 check_tmpdir = 1;
168 } else {
169 current_tmpdir = (char *)default_tmpdir;
173 * Check that the temporary directory given exists, and is a directory.
175 if (check_tmpdir) {
176 if (stat(current_tmpdir, &s) != 0) {
177 warn(gettext("cannot stat temporary directory %s"),
178 current_tmpdir);
180 current_tmpdir = (char *)default_tmpdir;
181 } else if (!S_ISDIR(s.st_mode)) {
182 warn(gettext("%s is not a directory; "
183 "using default temporary directory"),
184 current_tmpdir);
186 current_tmpdir = (char *)default_tmpdir;
190 ASSERT(current_tmpdir != NULL);
192 current_template = safe_realloc(NULL, strlen(current_tmpdir)
193 + strlen(default_template) + strlen(default_template_count) + 1);
195 reset_file_template();
198 char *
199 get_file_template()
201 return (current_template);
205 * Output guard routines.
207 void
208 establish_output_guard(sort_t *S)
210 struct stat output_stat;
212 if (S->m_output_to_stdout)
213 return;
215 if (stat(S->m_output_filename, &output_stat) == 0) {
216 stream_t *strp = S->m_input_streams;
218 while (strp != NULL) {
220 * We needn't protect an empty file.
222 if (!(strp->s_status & STREAM_NOTFILE) &&
223 strp->s_dev == output_stat.st_dev &&
224 strp->s_ino == output_stat.st_ino &&
225 strp->s_filesize > 0) {
226 output_guard_filename = S->m_output_filename;
227 output_guard_size = strp->s_filesize;
229 ASSERT(output_guard_filename != NULL);
231 if (bump_file_template() < 0)
232 die(EMSG_TEMPORARY);
234 if ((strp->s_filename = output_guard_tempname =
235 strdup(get_file_template())) == NULL)
236 die(EMSG_ALLOC);
238 xcp(output_guard_tempname,
239 output_guard_filename, output_guard_size);
241 output_guard_copy_complete = 1;
243 return;
245 strp = strp->s_next;
250 void
251 remove_output_guard()
253 if (output_guard_tempname && unlink(output_guard_tempname) == -1)
254 warn(gettext("unable to unlink %s"), output_guard_tempname);
256 output_guard_tempname = NULL;
259 void
260 set_cleanup_chain(stream_t **strp)
262 ASSERT(strp != NULL);
264 cleanup_chain = strp;
268 * atexit_handler() cleans up any temporary files outstanding after a fatal
269 * signal, a call to die() or at exit(). To preserve the input file under low
270 * storage conditions (and both the output file and the temporary files are
271 * directed at the same filesystem), we remove all temporary files but the
272 * output guard first, and then restore the original file. Of course, this is
273 * not foolproof, as another writer may have exhausted storage.
275 void
276 atexit_handler()
278 stream_t *strp;
280 if (cleanup_chain && *cleanup_chain)
281 for (strp = *cleanup_chain; strp != NULL; strp = strp->s_next)
282 stream_unlink_temporary(strp);
284 if (output_guard_tempname) {
285 if (output_guard_copy_complete)
286 xcp(output_guard_filename, output_guard_tempname,
287 output_guard_size);
289 remove_output_guard();
292 __S(stats_display());
295 size_t
296 strtomem(char *S)
298 const char *format_str = "%lf%c";
299 double val = 0.0;
300 size_t retval;
301 char units = 'k';
302 size_t phys_total = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
304 if (sscanf(S, format_str, &val, &units) < 1 || val < 0)
305 return (0);
307 if (units == '%') {
308 if (val < 0 || val > 100)
309 return (0);
310 val *= phys_total / 100;
311 } else
312 switch (units) {
313 case 't' : /* terabytes */
314 case 'T' :
315 val *= 1024;
316 /*FALLTHROUGH*/
317 case 'g' : /* gigabytes */
318 case 'G' :
319 val *= 1024;
320 /*FALLTHROUGH*/
321 case 'm' : /* megabytes */
322 case 'M' :
323 val *= 1024;
324 /*FALLTHROUGH*/
325 case 'k' : /* kilobytes */
326 case 'K' :
327 val *= 1024;
328 /*FALLTHROUGH*/
329 case 'b' : /* bytes */
330 case 'B' :
331 break;
332 default :
334 * default is kilobytes
336 val *= 1024;
337 break;
340 if (val > SIZE_MAX)
341 return (0);
343 retval = (size_t)val;
345 return (retval);
348 size_t
349 available_memory(size_t mem_limit)
351 size_t phys_avail = sysconf(_SC_AVPHYS_PAGES) * sysconf(_SC_PAGESIZE);
352 size_t avail;
354 if (mem_limit != 0) {
355 #ifdef DEBUG
357 * In the debug case, we want to test the temporary files
358 * handling, so no lower bound on the memory limit is imposed.
360 avail = mem_limit;
361 #else
362 avail = MAX(64 * KILOBYTE, mem_limit);
363 #endif /* DEBUG */
364 } else {
365 avail = MAX(64 * KILOBYTE, MIN(AV_MEM_MULTIPLIER * phys_avail /
366 AV_MEM_DIVISOR, 16 * MEGABYTE));
369 __S(stats_set_available_memory(avail));
371 return (avail);
374 void
375 set_memory_ratio(sort_t *S, int *numerator, int *denominator)
377 if (S->m_c_locale) {
378 *numerator = CHAR_AVG_LINE;
379 *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) +
380 CHAR_AVG_LINE + CHAR_AVG_LINE;
381 return;
384 if (S->m_single_byte_locale) {
385 *numerator = CHAR_AVG_LINE;
386 *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) +
387 CHAR_AVG_LINE + XFRM_MULTIPLIER * CHAR_AVG_LINE;
388 return;
391 *numerator = WCHAR_AVG_LINE;
392 *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) +
393 WCHAR_AVG_LINE + WCHAR_AVG_LINE;
396 void *
397 safe_realloc(void *ptr, size_t sz)
400 * safe_realloc() is not meant as an alternative free() mechanism--we
401 * disallow reallocations to size zero.
403 ASSERT(sz != 0);
405 if ((ptr = realloc(ptr, sz)) != NULL)
406 return (ptr);
408 die(gettext("unable to reallocate buffer"));
409 /*NOTREACHED*/
410 return (NULL); /* keep gcc happy */
413 void
414 safe_free(void *ptr)
416 if (ptr)
417 free(ptr);
420 void *
421 xzmap(void *addr, size_t len, int prot, int flags, off_t off)
423 void *pa;
425 pa = mmap(addr, len, prot, flags | MAP_ANON, -1, off);
426 if (pa == MAP_FAILED)
427 die(gettext("can't mmap anonymous memory"));
429 return (pa);
432 void
433 usage()
435 (void) fprintf(stderr,
436 gettext("usage: %s [-cmu] [-o output] [-T directory] [-S mem]"
437 " [-z recsz]\n\t[-dfiMnr] [-b] [-t char] [-k keydef]"
438 " [+pos1 [-pos2]] files...\n"), CMDNAME);
439 exit(E_USAGE);
443 * hold_file_descriptor() and release_file_descriptor() reserve a single file
444 * descriptor entry for later use. We issue the hold prior to any loop that has
445 * an exit condition based on the receipt of EMFILE from an open() call; once we
446 * have exited, we can release, typically prior to opening a file for output.
448 void
449 hold_file_descriptor()
451 ASSERT(held_fd == -1);
453 if ((held_fd = open("/dev/null", O_RDONLY)) == -1)
454 die(gettext("insufficient available file descriptors\n"));
457 void
458 release_file_descriptor()
460 ASSERT(held_fd != -1);
462 (void) close(held_fd);
463 held_fd = -1;
466 void
467 copy_line_rec(const line_rec_t *a, line_rec_t *b)
469 (void) memcpy(b, a, sizeof (line_rec_t));
472 void
473 trip_eof(FILE *f)
475 if (feof(f))
476 return;
478 (void) ungetc(fgetc(f), f);
482 * int cxwrite(int, char *, size_t)
484 * Overview
485 * cxwrite() implements a buffered version of fwrite(ptr, nbytes, 1, .) on
486 * file descriptors. It returns -1 in the case that the write() fails to
487 * write the current buffer contents. cxwrite() must be flushed before being
488 * applied to a new file descriptor.
490 * Return values
491 * 0 on success, -1 on error.
494 cxwrite(int fd, char *ptr, size_t nbytes)
496 static char buffer[XBUFFER_SIZE];
497 static size_t offset = 0;
498 size_t mbytes;
500 if (ptr == NULL) {
501 errno = 0;
502 while (offset -= write(fd, buffer, offset)) {
503 if (errno)
504 break;
507 if (offset)
508 return (-1);
510 return (0);
513 while (nbytes != 0) {
514 if (offset + nbytes > XBUFFER_SIZE)
515 mbytes = XBUFFER_SIZE - offset;
516 else
517 mbytes = nbytes;
519 (void) memcpy(buffer + offset, ptr, mbytes);
520 nbytes -= mbytes;
521 offset += mbytes;
522 ptr += mbytes;
524 if (nbytes) {
525 errno = 0;
526 while (offset -= write(fd, buffer, offset)) {
527 if (errno)
528 break;
531 if (offset)
532 return (-1);
536 return (0);
540 * int wxwrite(int, wchar_t *)
542 * Overview
543 * wxwrite() implements a buffered write() function for null-terminated wide
544 * character buffers with similar calling semantics to cxwrite(). It returns
545 * -1 in the case that it fails to write the current buffer contents.
546 * wxwrite() must be flushed before being applied to a new file descriptor.
548 * Return values
549 * 0 on success, -1 on error.
552 wxwrite(int fd, wchar_t *ptr)
554 static char *convert_buffer;
555 static size_t convert_bufsize = 1024;
556 size_t req_bufsize;
558 if (ptr == NULL)
559 return (cxwrite(NULL, 0, 1));
561 if (convert_buffer == NULL)
562 convert_buffer = safe_realloc(NULL, convert_bufsize);
564 * We use wcstombs(NULL, ., .) to verify that we have an adequate
565 * buffer size for the conversion. Since this buffer was converted into
566 * wide character format earlier, we can safely assume that the buffer
567 * can be converted back to the external multibyte form.
569 req_bufsize = wcstombs(NULL, ptr, convert_bufsize);
570 if (req_bufsize > convert_bufsize) {
571 convert_bufsize = req_bufsize + 1;
572 convert_buffer = safe_realloc(convert_buffer, convert_bufsize);
575 (void) wcstombs(convert_buffer, ptr, convert_bufsize);
577 return (cxwrite(fd, convert_buffer, req_bufsize));
581 xstreql(const char *a, const char *b)
583 return (strcmp(a, b) == 0);
587 xstrneql(const char *a, const char *b, const size_t l)
589 return (strncmp(a, b, l) == 0);
592 char *
593 xstrnchr(const char *S, const int c, const size_t n)
595 const char *eS = S + n;
597 do {
598 if (*S == (char)c)
599 return ((char *)S);
600 } while (++S < eS);
602 return (NULL);
605 void
606 xstrninv(char *s, ssize_t start, ssize_t length)
608 ssize_t i;
610 for (i = start; i < start + length; i++)
611 s[i] = UCHAR_MAX - s[i];
615 xwcsneql(const wchar_t *a, const wchar_t *b, const size_t length)
617 return (wcsncmp(a, b, length) == 0);
620 wchar_t *
621 xwsnchr(const wchar_t *ws, const wint_t wc, const size_t n)
623 const wchar_t *ews = ws + n;
625 do {
626 if (*ws == (wchar_t)wc)
627 return ((wchar_t *)ws);
628 } while (++ws < ews);
630 return (NULL);
633 void
634 xwcsninv(wchar_t *s, ssize_t start, ssize_t length)
636 ssize_t i;
638 for (i = start; i < start + length; i++)
639 s[i] = WCHAR_MAX - s[i];
642 #ifdef _LITTLE_ENDIAN
643 void
644 xwcsntomsb(wchar_t *s, ssize_t length)
646 ssize_t i;
648 ASSERT(sizeof (wchar_t) == sizeof (uint32_t));
650 for (i = 0; i < length; i++, s++) {
651 char *t = (char *)s;
652 char u;
654 u = *t;
655 *t = *(t + 3);
656 *(t + 3) = u;
658 u = *(t + 1);
659 *(t + 1) = *(t + 2);
660 *(t + 2) = u;
663 #endif /* _LITTLE_ENDIAN */
665 wchar_t *
666 xmemwchar(wchar_t *s, wchar_t w, ssize_t length)
668 ssize_t i = length;
670 while (--i > 0) {
671 if (*s == w)
672 return (s);
673 s++;
676 return (NULL);
679 void
680 xcp(char *dst, char *src, off_t size)
682 int fd_in, fd_out;
683 void *mm_in;
684 size_t chunksize = 2 * MEGABYTE;
685 int i;
686 ssize_t nchunks = size / chunksize;
687 ssize_t lastchunk = size % chunksize;
689 if (dst == NULL || src == NULL)
690 return;
692 if ((fd_in = open(src, O_RDONLY)) < 0)
693 die(EMSG_OPEN, src);
694 if ((fd_out = open(dst, O_RDWR | O_CREAT | O_TRUNC, OUTPUT_MODE)) < 0)
695 die(EMSG_OPEN, dst);
697 for (i = 0; i < nchunks; i++) {
698 if ((mm_in = mmap(0, chunksize, PROT_READ, MAP_SHARED, fd_in,
699 i * chunksize)) == MAP_FAILED)
700 die(EMSG_MMAP, src);
702 if (write(fd_out, mm_in, chunksize) != chunksize)
703 die(EMSG_WRITE, dst);
705 (void) munmap(mm_in, chunksize);
708 if (lastchunk) {
709 if ((mm_in = mmap(0, lastchunk, PROT_READ, MAP_SHARED, fd_in,
710 nchunks * chunksize)) == MAP_FAILED)
711 die(EMSG_MMAP, src);
713 if (write(fd_out, mm_in, lastchunk) != lastchunk)
714 die(EMSG_WRITE, dst);
716 (void) munmap(mm_in, lastchunk);
719 (void) close(fd_in);
721 if (close(fd_out) == -1)
722 die(EMSG_CLOSE, dst);
725 /*PRINTFLIKE1*/
726 void
727 warn(const char *format, ...)
729 int err = errno;
730 va_list alist;
732 if (pname != NULL)
733 (void) fprintf(stderr, gettext(PNAME_FMT), pname);
735 va_start(alist, format);
736 (void) vfprintf(stderr, format, alist);
737 va_end(alist);
739 if (strrchr(format, '\n') == NULL)
740 (void) fprintf(stderr, gettext(ERRNO_FMT), strerror(err));
743 /*PRINTFLIKE1*/
744 void
745 die(const char *format, ...)
747 int err = errno;
748 va_list alist;
750 if (pname != NULL)
751 (void) fprintf(stderr, gettext(PNAME_FMT), pname);
753 va_start(alist, format);
754 (void) vfprintf(stderr, format, alist);
755 va_end(alist);
757 if (strrchr(format, '\n') == NULL)
758 (void) fprintf(stderr, gettext(ERRNO_FMT), strerror(err));
760 exit(E_ERROR);
763 #ifdef DEBUG
765 * pprintc() is called only by xdump().
767 #define BYTES_PER_LINE 16
768 static void
769 pprintc(FILE *fp, char c)
771 if (isspace((uchar_t)c))
772 (void) fprintf(fp, " ");
773 else if (isprint((uchar_t)c))
774 (void) fprintf(fp, "%c", c);
775 else
776 (void) fprintf(fp, ".");
779 static void
780 pprintwc(FILE *fp, wchar_t c)
782 if (iswspace(c))
783 (void) fprintf(fp, " ");
784 else if (iswprint(c))
785 (void) fprintf(fp, "%wc", c);
786 else
787 (void) fprintf(fp, ".");
791 * xdump() is used only for debugging purposes.
793 void
794 xdump(FILE *fp, uchar_t *buf, size_t bufsize, int wide)
796 int i;
797 size_t nc = 0;
798 uchar_t d[BYTES_PER_LINE];
800 for (; nc < bufsize; buf++) {
801 d[nc % BYTES_PER_LINE] = *buf;
802 if (nc % BYTES_PER_LINE == 0) {
803 (void) fprintf(fp, "%08x:", nc);
805 (void) fprintf(fp, " %02x", *buf);
806 nc++;
807 if (nc % BYTES_PER_LINE == 0) {
808 (void) fprintf(fp, " ");
809 if (wide) {
810 for (i = 0; i < BYTES_PER_LINE;
811 i += sizeof (wchar_t))
812 pprintwc(fp, *(wchar_t *)(d + i));
813 } else {
814 for (i = 0; i < BYTES_PER_LINE; i++)
815 pprintc(fp, d[i]);
817 (void) fprintf(fp, "\n");
821 for (i = nc % BYTES_PER_LINE; i < BYTES_PER_LINE; i++)
822 (void) fprintf(fp, " ");
824 (void) fprintf(fp, " ");
826 if (wide) {
827 for (i = 0; i < nc % BYTES_PER_LINE; i += sizeof (wchar_t))
828 pprintwc(fp, *(wchar_t *)(d + i));
829 } else {
830 for (i = 0; i < nc % BYTES_PER_LINE; i++)
831 pprintc(fp, d[i]);
834 (void) fprintf(fp, "\n");
836 #endif /* DEBUG */