8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / cmd / od / od.c
blob30e140ba96fe6d18677b01df7a02c83883d10e61
1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
17 * od - octal dump. Not really just octal anymore; read the POSIX
18 * specification for it -- its more complex than you think!
20 * NB: We followed the POSIX semantics fairly strictly, where the
21 * legacy code's behavior was in conflict. In many cases the legacy
22 * Solaris code was so completely broken as to be completely unusable.
23 * (For example, the long double support was broken beyond
24 * imagination!) Note that GNU coreutils violates POSIX in a few
25 * interesting ways, such as changing the numbering of the addresses
26 * when skipping. (Address starts should always be at 0, according to
27 * the sample output in the Open Group man page.)
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <sys/types.h>
33 #include <string.h>
34 #include <err.h>
35 #include <wchar.h>
36 #include <locale.h>
37 #include <unistd.h>
38 #include <sys/stat.h>
40 #define _(x) gettext(x)
43 #ifndef TEXT_DOMAIN
44 #define TEXT_DOMAIN "SYS_TEST"
45 #endif
47 /* address format */
48 static char *afmt = "%07llo";
49 static char *cfmt = " ";
51 static FILE *input = NULL;
52 static size_t lcm = 1;
53 static size_t blocksize = 16;
54 static int numfiles = 0;
55 static int curfile = 0;
56 static char **files = NULL;
57 static off_t limit = -1;
60 * This structure describes our ring buffer. Its always a power of 2
61 * in size to make wrap around calculations fast using a mask instead
62 * of doing modulo.
64 * The size is calculated thusly: We need three "blocks" of data, as
65 * we process a block at a time (one block == one line of od output.)
67 * We need lookahead of an extra block to support multibyte chars. We
68 * also have a look behind so that we can avoid printing lines that
69 * are identical to what we've already printed. Finally, we need the
70 * current block.
72 * The block size is determined by the least common multiple of the
73 * data items being displayed. Usually it will be 16, but sometimes
74 * it is 24 (when 12-byte long doubles are presented.)
76 * The data buffer is allocaed via memalign to make sure it is
77 * properly aligned.
79 typedef struct buffer {
80 char *data; /* data buffer */
81 int prod; /* producer index */
82 int cons; /* consumer index */
83 int mask; /* buffer size - 1, wraparound index */
84 int navail; /* total bytes avail */
85 } buffer_t;
88 * This structure is used to provide information on a specific output
89 * format. We link them together in a list representing the output
90 * formats that the user has selected.
92 typedef struct output {
93 int width; /* bytes consumed per call */
94 void (*func)(buffer_t *, int); /* output function */
95 struct output *next; /* link node */
96 } output_t;
99 * Specifiers
102 typedef unsigned char u8;
103 typedef unsigned short u16;
104 typedef unsigned int u32;
105 typedef unsigned long long u64;
106 typedef char s8;
107 typedef short s16;
108 typedef int s32;
109 typedef long long s64;
110 typedef float fF;
111 typedef double fD;
112 typedef long double fL;
114 static void
115 usage(void)
117 (void) fprintf(stderr, _("usage: od [-bcCdDfFoOsSvxX] "
118 "[-t types ]... [-A base] [-j skip] [-N count] [file]...\n"));
119 exit(1);
122 #define DECL_GET(typ) \
123 static typ \
124 get_ ## typ(buffer_t *b, int index) \
126 typ val = *(typ *)(void *)(b->data + index); \
127 return (val); \
129 DECL_GET(u8)
130 DECL_GET(u16)
131 DECL_GET(u32)
132 DECL_GET(u64)
133 DECL_GET(s8)
134 DECL_GET(s16)
135 DECL_GET(s32)
136 DECL_GET(s64)
137 DECL_GET(fF)
138 DECL_GET(fD)
139 DECL_GET(fL)
141 #define DECL_OUT(nm, typ, fmt) \
142 static void \
143 do_ ## nm(buffer_t *buf, int index) \
145 typ v = get_ ## typ(buf, index); \
146 (void) printf(fmt, v); \
149 static output_t output_ ## nm = { \
150 sizeof (typ), do_ ## nm \
153 DECL_OUT(oct_b, u8, " %03o")
154 DECL_OUT(oct_w, u16, " %06ho")
155 DECL_OUT(oct_d, u32, " %011o")
156 DECL_OUT(oct_q, u64, " %022llo")
157 DECL_OUT(dec_b, u8, " %03u")
158 DECL_OUT(dec_w, u16, " %05hu")
159 DECL_OUT(dec_d, u32, " %010u")
160 DECL_OUT(dec_q, u64, " %020llu")
161 DECL_OUT(sig_b, s8, " %03d")
162 DECL_OUT(sig_w, s16, " %6.05hd")
163 DECL_OUT(sig_d, s32, " %11.010d")
164 DECL_OUT(sig_q, s64, " %20.019lld")
165 DECL_OUT(hex_b, u8, " %02x")
166 DECL_OUT(hex_w, u16, " %04hx")
167 DECL_OUT(hex_d, s32, " %08x")
168 DECL_OUT(hex_q, s64, " %016llx")
169 DECL_OUT(float, fF, " %14.7e")
170 DECL_OUT(double, fD, " %21.14e")
171 DECL_OUT(ldouble, fL, " %24.14Le")
173 static char *ascii[] = {
174 "nul", "soh", "stx", "etx", "eot", "enq", "ack", " be",
175 " bs", " ht", " lf", " vt", " ff", " cr", " so", " si",
176 "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
177 "can", " em", "sub", "esc", " fs", " gs", " rs", " us",
178 " sp", " !", " \"", " #", " $", " %", " &", " '",
179 " (", " )", " *", " +", " ,", " -", " .", " /",
180 " 0", " 1", " 2", " 3", " 4", " 5", " 6", " 7",
181 " 8", " 9", " :", " ;", " <", " =", " >", " ?",
182 " @", " A", " B", " C", " D", " E", " F", " G",
183 " H", " I", " J", " K", " L", " M", " N", " O",
184 " P", " Q", " R", " S", " T", " U", " V", " W",
185 " X", " Y", " Z", " [", " \\", " ]", " ^", " _",
186 " `", " a", " b", " c", " d", " e", " f", " g",
187 " h", " i", " j", " k", " l", " m", " n", " o",
188 " p", " q", " r", " s", " t", " u", " v", " w",
189 " x", " y", " z", " {", " |", " }", " ~", "del"
192 static void
193 do_ascii(buffer_t *buf, int index)
195 uint8_t v = get_u8(buf, index);
197 (void) fputc(' ', stdout);
198 (void) fputs(ascii[v & 0x7f], stdout);
201 static output_t output_ascii = {
202 1, do_ascii,
205 static void
206 do_char(buffer_t *buf, int index)
208 static int nresid = 0;
209 static int printable = 0;
210 int cnt;
211 int avail;
212 int nb;
213 char scratch[10];
214 wchar_t wc;
215 int which;
217 uint8_t v = get_u8(buf, index);
220 * If there were residual bytes from an earlier
221 * character, then just display the ** continuation
222 * indication.
224 if (nresid) {
225 if (printable) {
226 (void) fputs(" **", stdout);
227 } else {
228 (void) printf(" %03o", v);
230 nresid--;
231 return;
235 * Peek ahead up to MB_CUR_MAX characters. This has to be
236 * done carefully because we might need to look into the next
237 * block to really know for sure.
239 scratch[0] = v;
240 avail = buf->navail;
241 if (avail > MB_CUR_MAX)
242 avail = MB_CUR_MAX;
243 for (cnt = 1, which = index + 1; cnt < avail; cnt++, which++) {
244 scratch[cnt] = buf->data[which & buf->mask];
247 /* now see if the value is a real character */
248 nresid = 0;
249 wc = 0;
250 nb = mbtowc(&wc, scratch, avail);
251 if (nb < 0) {
252 (void) printf(" %03o", v);
253 return;
255 if (nb == 0) {
256 (void) fputs(" \\0", stdout);
257 return;
259 nresid = nb - 1;
260 if (nb && iswprint(wc)) {
261 scratch[nb] = 0;
262 (void) fputs(" ", stdout);
263 (void) fputs(scratch, stdout);
264 printable = 1;
265 return;
267 printable = 0;
268 if (wc == 0) {
269 (void) fputs(" \\0", stdout);
270 } else if (wc == '\b') {
271 (void) fputs(" \\b", stdout);
272 } else if (wc == '\f') {
273 (void) fputs(" \\f", stdout);
274 } else if (wc == '\n') {
275 (void) fputs(" \\n", stdout);
276 } else if (wc == '\r') {
277 (void) fputs(" \\r", stdout);
278 } else if (wc == '\t') {
279 (void) fputs(" \\t", stdout);
280 } else {
281 (void) printf(" %03o", v);
285 static output_t output_char = {
286 1, do_char,
290 * List of output formatting structures.
292 static output_t *head = NULL;
293 static output_t **tailp = &head;
295 static void
296 add_out(output_t *src)
298 output_t *out;
299 int m;
301 if ((out = calloc(1, sizeof (*src))) == NULL) {
302 err(1, "malloc");
305 m = lcm;
306 while ((m % src->width) != 0) {
307 m += lcm;
309 lcm = m;
310 blocksize = lcm;
311 while (blocksize < 16)
312 blocksize *= 2;
314 (void) memcpy(out, src, sizeof (*src));
315 *tailp = out;
316 tailp = &out->next;
319 static FILE *
320 next_input(void)
322 for (;;) {
323 if (curfile >= numfiles)
324 return (NULL);
326 if (input != NULL) {
327 if ((input = freopen(files[curfile], "r", input)) !=
328 NULL) {
329 curfile++;
330 return (input);
332 } else {
333 if ((input = fopen(files[curfile], "r")) != NULL) {
334 curfile++;
335 return (input);
338 warn("open: %s", files[curfile]);
339 curfile++;
343 static void
344 refill(buffer_t *b)
346 int n;
347 int want;
348 int zero;
351 * If we have 2 blocks of bytes available, we're done. Note
352 * that each iteration usually loads up 16 bytes, unless we
353 * run out of data.
355 while ((input != NULL) && (b->navail < (2 * blocksize))) {
357 /* we preload the next one in advance */
359 if (limit == 0) {
360 (void) fclose(input);
361 input = NULL;
362 continue;
365 /* we want to read a whole block if possible */
366 want = blocksize;
367 if ((limit >= 0) && (want > limit)) {
368 want = limit;
370 zero = blocksize;
372 while (want && input) {
373 int c;
374 b->prod &= b->mask;
375 c = (b->prod + want > (b->mask + 1)) ?
376 b->mask - b->prod :
377 want;
379 n = fread(b->data + b->prod, 1, c, input);
380 if (n < 0) {
381 warn("read: %s",
382 files ? files[curfile-1] : "stdin");
383 input = next_input();
384 continue;
386 if (n == 0) {
387 input = next_input();
388 continue;
390 if (limit >= 0)
391 limit -= n;
392 b->navail += n;
393 b->prod += n;
394 want -= n;
395 zero -= n;
398 while (zero) {
399 b->data[b->prod & b->mask] = 0;
400 b->prod++;
401 b->prod &= b->mask;
402 zero--;
407 #define STR1 "C1"
408 #define STR2 "S2"
409 #ifdef _LP64
410 #define STR8 "L8"
411 #define STR4 "I4"
412 #else
413 #define STR8 "8"
414 #define STR4 "IL4"
415 #endif
417 static void
418 do_type_string(char *typestr)
420 if (*typestr == 0) {
421 errx(1, _("missing type string"));
423 while (*typestr) {
424 switch (*typestr) {
425 case 'a':
426 typestr++;
427 add_out(&output_ascii);
428 break;
429 case 'c':
430 add_out(&output_char);
431 typestr++;
432 break;
433 case 'f':
434 typestr++;
435 switch (*typestr) {
436 case 'F':
437 case '4':
438 add_out(&output_float);
439 typestr++;
440 break;
441 case '8':
442 case 'D':
443 add_out(&output_double);
444 typestr++;
445 break;
446 case 'L':
447 add_out(&output_ldouble);
448 typestr++;
449 break;
450 default:
451 add_out(&output_float);
452 break;
454 break;
457 case 'd':
458 typestr++;
459 if (strchr(STR1, *typestr)) {
460 typestr++;
461 add_out(&output_sig_b);
462 } else if (strchr(STR2, *typestr)) {
463 typestr++;
464 add_out(&output_sig_w);
465 } else if (strchr(STR4, *typestr)) {
466 typestr++;
467 add_out(&output_sig_d);
468 } else if (strchr(STR8, *typestr)) {
469 typestr++;
470 add_out(&output_sig_q);
471 } else {
472 add_out(&output_sig_d);
474 break;
476 case 'u':
477 typestr++;
478 if (strchr(STR1, *typestr)) {
479 typestr++;
480 add_out(&output_dec_b);
481 } else if (strchr(STR2, *typestr)) {
482 typestr++;
483 add_out(&output_dec_w);
484 } else if (strchr(STR4, *typestr)) {
485 typestr++;
486 add_out(&output_dec_d);
487 } else if (strchr(STR8, *typestr)) {
488 typestr++;
489 add_out(&output_dec_q);
490 } else {
491 add_out(&output_dec_d);
493 break;
495 case 'o':
496 typestr++;
497 if (strchr(STR1, *typestr)) {
498 typestr++;
499 add_out(&output_oct_b);
500 } else if (strchr(STR2, *typestr)) {
501 typestr++;
502 add_out(&output_oct_w);
503 } else if (strchr(STR4, *typestr)) {
504 typestr++;
505 add_out(&output_oct_d);
506 } else if (strchr(STR8, *typestr)) {
507 typestr++;
508 add_out(&output_oct_q);
509 } else {
510 add_out(&output_oct_d);
512 break;
514 case 'x':
515 typestr++;
516 if (strchr(STR1, *typestr)) {
517 typestr++;
518 add_out(&output_hex_b);
519 } else if (strchr(STR2, *typestr)) {
520 typestr++;
521 add_out(&output_hex_w);
522 } else if (strchr(STR4, *typestr)) {
523 typestr++;
524 add_out(&output_hex_d);
525 } else if (strchr(STR8, *typestr)) {
526 typestr++;
527 add_out(&output_hex_q);
528 } else {
529 add_out(&output_hex_d);
531 break;
533 default:
534 errx(1, _("unrecognized type string character: %c"),
535 *typestr);
536 exit(1);
542 main(int argc, char **argv)
544 int c;
545 int i;
546 buffer_t buffer;
547 boolean_t first = B_TRUE;
548 boolean_t doall = B_FALSE;
549 boolean_t same = B_FALSE;
550 boolean_t newarg = B_FALSE;
551 off_t offset = 0;
552 off_t skip = 0;
553 char *eptr;
554 char *offstr = 0;
556 input = stdin;
558 (void) setlocale(LC_ALL, "");
559 (void) textdomain(TEXT_DOMAIN);
561 while ((c = getopt(argc, argv, "A:bCcdDfFj:N:oOsSxXvt:")) != EOF) {
562 switch (c) {
563 case 'A':
564 newarg = B_TRUE;
565 if (strlen(optarg) > 1) {
566 afmt = NULL;
568 switch (*optarg) {
569 case 'o':
570 afmt = "%07llo";
571 cfmt = " ";
572 break;
573 case 'd':
574 afmt = "%07lld";
575 cfmt = " ";
576 break;
577 case 'x':
578 afmt = "%07llx";
579 cfmt = " ";
580 break;
581 case 'n':
583 * You could argue that the code should
584 * use the same 7 spaces. Legacy uses 8
585 * though. Oh well. Better to avoid
586 * gratuitous change.
588 afmt = " ";
589 cfmt = " ";
590 break;
591 default:
592 afmt = NULL;
593 break;
595 if (strlen(optarg) != 1) {
596 afmt = NULL;
598 if (afmt == NULL)
599 warnx(_("invalid address base, "
600 "must be o, d, x, or n"));
601 break;
603 case 'b':
604 add_out(&output_oct_b);
605 break;
607 case 'c':
608 case 'C':
609 add_out(&output_char);
610 break;
612 case 'f':
613 add_out(&output_float);
614 break;
616 case 'F':
617 add_out(&output_double);
618 break;
620 case 'd':
621 add_out(&output_dec_w);
622 break;
624 case 'D':
625 add_out(&output_dec_d);
626 break;
628 case 't':
629 newarg = B_TRUE;
630 do_type_string(optarg);
631 break;
633 case 'o':
634 add_out(&output_oct_w);
635 break;
637 case 'O':
638 add_out(&output_oct_d);
639 break;
641 case 's':
642 add_out(&output_sig_w);
643 break;
645 case 'S':
646 add_out(&output_sig_d);
647 break;
649 case 'x':
650 add_out(&output_hex_w);
651 break;
653 case 'X':
654 add_out(&output_hex_d);
655 break;
657 case 'v':
658 doall = B_TRUE;
659 break;
661 case 'j':
662 newarg = B_TRUE;
663 skip = strtoll(optarg, &eptr, 0);
664 if (*eptr == 'b') {
665 skip <<= 9; /* 512 bytes */
666 eptr++;
667 } else if (*eptr == 'k') {
668 skip <<= 10; /* 1k */
669 eptr++;
670 } else if (*eptr == 'm') {
671 skip <<= 20; /* 1m */
672 eptr++;
673 } else if (*eptr == 'g') {
674 skip <<= 30; /* 1g */
675 eptr++;
677 if ((skip < 0) || (eptr[0] != 0)) {
678 warnx(_("invalid skip count '%s' specified"),
679 optarg);
680 exit(1);
682 break;
684 case 'N':
685 newarg = B_TRUE;
686 limit = strtoll(optarg, &eptr, 0);
688 * POSIX doesn't specify this, but I think these
689 * may be helpful.
691 if (*eptr == 'b') {
692 limit <<= 9;
693 eptr++;
694 } else if (*eptr == 'k') {
695 limit <<= 10;
696 eptr++;
697 } else if (*eptr == 'm') {
698 limit <<= 20;
699 eptr++;
700 } else if (*eptr == 'g') {
701 limit <<= 30;
702 eptr++;
704 if ((limit < 0) || (eptr[0] != 0)) {
705 warnx(_("invalid byte count '%s' specified"),
706 optarg);
707 exit(1);
709 break;
711 default:
712 usage();
713 break;
717 /* this finds the smallest power of two size we can use */
718 buffer.mask = (1 << (ffs(blocksize * 3) + 1)) - 1;
719 buffer.data = memalign(16, buffer.mask + 1);
720 if (buffer.data == NULL) {
721 err(1, "memalign");
726 * Wow. This option parsing is hideous.
728 * If the we've not seen a new option, and there is just one
729 * operand, if it starts with a "+", then treat it as an
730 * offset. Otherwise if two operands, and the second operand
731 * starts with + or a digit, then it is an offset.
733 if (!newarg) {
734 if (((argc - optind) == 1) && (argv[optind][0] == '+')) {
735 offstr = argv[optind];
736 argc--;
737 } else if (((argc - optind) == 2) &&
738 (strchr("+0123456789", (argv[optind + 1][0])) != NULL)) {
739 offstr = argv[optind + 1];
740 argc--;
743 if (offstr) {
744 int base = 0;
745 int mult = 1;
746 int l;
747 if (*offstr == '+') {
748 offstr++;
750 l = strlen(offstr);
751 if ((strncmp(offstr, "0x", 2) == 0)) {
752 afmt = "%07llx";
753 base = 16;
754 offstr += 2;
755 if (offstr[l - 1] == 'B') {
756 offstr[l - 1] = 0;
757 l--;
758 mult = 512;
760 } else {
761 base = 8;
762 afmt = "%07llo";
763 if ((offstr[l - 1] == 'B') || (offstr[l - 1] == 'b')) {
764 offstr[l - 1] = 0;
765 l--;
766 mult = 512;
768 if (offstr[l - 1] == '.') {
769 offstr[l - 1] = 0;
770 base = 10;
771 afmt = "%07lld";
774 skip = strtoll(offstr, &eptr, base);
775 if (*eptr != '\0') {
776 errx(1, _("invalid offset string specified"));
778 skip *= mult;
779 offset += skip;
783 * Allocate an array for all the input files.
785 if (argc > optind) {
786 files = calloc(sizeof (char *), argc - optind);
787 for (i = 0; i < argc - optind; i++) {
788 files[i] = argv[optind + i];
789 numfiles++;
791 input = next_input();
792 } else {
793 input = stdin;
797 * We need to seek ahead. fseek would be faster.
799 while (skip && (input != NULL)) {
800 struct stat sbuf;
803 * Only fseek() on regular files. (Others
804 * we have to read().
806 if (fstat(fileno(input), &sbuf) < 0) {
807 warn("fstat: %s", files[curfile-1]);
808 input = next_input();
809 continue;
811 if (S_ISREG(sbuf.st_mode)) {
813 * No point in seeking a file that is too
814 * short to begin with.
816 if (sbuf.st_size < skip) {
817 skip -= sbuf.st_size;
818 input = next_input();
819 continue;
821 if (fseeko(input, skip, SEEK_SET) < 0) {
822 err(1, "fseek:%s", files[curfile-1]);
824 /* Done seeking. */
825 skip = 0;
826 break;
830 * fgetc seems like it would be slow, but it uses
831 * buffered I/O, so it should be fast enough.
833 flockfile(input);
834 while (skip) {
835 if (getc_unlocked(input) == EOF) {
836 funlockfile(input);
837 if (ferror(input)) {
838 warn("read: %s", files[curfile-1]);
840 input = next_input();
841 if (input != NULL) {
842 flockfile(input);
844 break;
846 skip--;
848 if (input != NULL)
849 funlockfile(input);
852 if (head == NULL) {
853 add_out(&output_oct_w);
856 buffer.navail = 0;
857 buffer.prod = 0;
858 buffer.cons = 0;
860 for (refill(&buffer); buffer.navail > 0; refill(&buffer)) {
861 output_t *out;
862 int mx;
863 int j, k;
866 * If this buffer was the same as last, then just
867 * dump an asterisk.
869 if ((!first) && (buffer.navail >= blocksize) && (!doall)) {
870 j = buffer.cons;
871 k = j - blocksize;
872 for (i = 0; i < blocksize; i++) {
873 if (buffer.data[j & buffer.mask] !=
874 buffer.data[k & buffer.mask]) {
875 break;
877 j++;
878 k++;
880 if (i == blocksize) {
881 if (!same) {
882 (void) fputs("*\n", stdout);
883 same = B_TRUE;
885 buffer.navail -= blocksize;
886 offset += blocksize;
887 buffer.cons += blocksize;
888 buffer.cons &= buffer.mask;
889 continue;
893 first = B_FALSE;
894 same = B_FALSE;
895 mx = (buffer.navail > blocksize) ? blocksize : buffer.navail;
897 for (out = head; out != NULL; out = out->next) {
899 if (out == head) {
900 /*LINTED E_SEC_PRINTF_VAR_FMT*/
901 (void) printf(afmt, offset);
902 } else {
903 (void) fputs(cfmt, stdout);
905 for (i = 0, j = buffer.cons; i < mx; i += out->width) {
906 out->func(&buffer, j);
907 j += out->width;
908 j &= buffer.mask;
910 (void) fputs("\n", stdout);
912 buffer.cons += mx;
913 buffer.cons &= buffer.mask;
914 offset += mx;
915 buffer.navail -= mx;
917 /*LINTED E_SEC_PRINTF_VAR_FMT*/
918 (void) printf(afmt, offset);
919 (void) fputs("\n", stdout);
920 return (0);