2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
17 * od - octal dump. Not really just octal anymore; read the POSIX
18 * specification for it -- its more complex than you think!
20 * NB: We followed the POSIX semantics fairly strictly, where the
21 * legacy code's behavior was in conflict. In many cases the legacy
22 * Solaris code was so completely broken as to be completely unusable.
23 * (For example, the long double support was broken beyond
24 * imagination!) Note that GNU coreutils violates POSIX in a few
25 * interesting ways, such as changing the numbering of the addresses
26 * when skipping. (Address starts should always be at 0, according to
27 * the sample output in the Open Group man page.)
32 #include <sys/types.h>
40 #define _(x) gettext(x)
44 #define TEXT_DOMAIN "SYS_TEST"
48 static char *afmt
= "%07llo";
49 static char *cfmt
= " ";
51 static FILE *input
= NULL
;
52 static size_t lcm
= 1;
53 static size_t blocksize
= 16;
54 static int numfiles
= 0;
55 static int curfile
= 0;
56 static char **files
= NULL
;
57 static off_t limit
= -1;
60 * This structure describes our ring buffer. Its always a power of 2
61 * in size to make wrap around calculations fast using a mask instead
64 * The size is calculated thusly: We need three "blocks" of data, as
65 * we process a block at a time (one block == one line of od output.)
67 * We need lookahead of an extra block to support multibyte chars. We
68 * also have a look behind so that we can avoid printing lines that
69 * are identical to what we've already printed. Finally, we need the
72 * The block size is determined by the least common multiple of the
73 * data items being displayed. Usually it will be 16, but sometimes
74 * it is 24 (when 12-byte long doubles are presented.)
76 * The data buffer is allocaed via memalign to make sure it is
79 typedef struct buffer
{
80 char *data
; /* data buffer */
81 int prod
; /* producer index */
82 int cons
; /* consumer index */
83 int mask
; /* buffer size - 1, wraparound index */
84 int navail
; /* total bytes avail */
88 * This structure is used to provide information on a specific output
89 * format. We link them together in a list representing the output
90 * formats that the user has selected.
92 typedef struct output
{
93 int width
; /* bytes consumed per call */
94 void (*func
)(buffer_t
*, int); /* output function */
95 struct output
*next
; /* link node */
102 typedef unsigned char u8
;
103 typedef unsigned short u16
;
104 typedef unsigned int u32
;
105 typedef unsigned long long u64
;
109 typedef long long s64
;
112 typedef long double fL
;
117 (void) fprintf(stderr
, _("usage: od [-bcCdDfFoOsSvxX] "
118 "[-t types ]... [-A base] [-j skip] [-N count] [file]...\n"));
122 #define DECL_GET(typ) \
124 get_ ## typ(buffer_t *b, int index) \
126 typ val = *(typ *)(void *)(b->data + index); \
141 #define DECL_OUT(nm, typ, fmt) \
143 do_ ## nm(buffer_t *buf, int index) \
145 typ v = get_ ## typ(buf, index); \
146 (void) printf(fmt, v); \
149 static output_t output_ ## nm = { \
150 sizeof (typ), do_ ## nm \
153 DECL_OUT(oct_b
, u8
, " %03o")
154 DECL_OUT(oct_w
, u16
, " %06ho")
155 DECL_OUT(oct_d
, u32
, " %011o")
156 DECL_OUT(oct_q
, u64
, " %022llo")
157 DECL_OUT(dec_b
, u8
, " %03u")
158 DECL_OUT(dec_w
, u16
, " %05hu")
159 DECL_OUT(dec_d
, u32
, " %010u")
160 DECL_OUT(dec_q
, u64
, " %020llu")
161 DECL_OUT(sig_b
, s8
, " %03d")
162 DECL_OUT(sig_w
, s16
, " %6.05hd")
163 DECL_OUT(sig_d
, s32
, " %11.010d")
164 DECL_OUT(sig_q
, s64
, " %20.019lld")
165 DECL_OUT(hex_b
, u8
, " %02x")
166 DECL_OUT(hex_w
, u16
, " %04hx")
167 DECL_OUT(hex_d
, s32
, " %08x")
168 DECL_OUT(hex_q
, s64
, " %016llx")
169 DECL_OUT(float, fF
, " %14.7e")
170 DECL_OUT(double, fD
, " %21.14e")
171 DECL_OUT(ldouble
, fL
, " %24.14Le")
173 static char *ascii
[] = {
174 "nul", "soh", "stx", "etx", "eot", "enq", "ack", " be",
175 " bs", " ht", " lf", " vt", " ff", " cr", " so", " si",
176 "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
177 "can", " em", "sub", "esc", " fs", " gs", " rs", " us",
178 " sp", " !", " \"", " #", " $", " %", " &", " '",
179 " (", " )", " *", " +", " ,", " -", " .", " /",
180 " 0", " 1", " 2", " 3", " 4", " 5", " 6", " 7",
181 " 8", " 9", " :", " ;", " <", " =", " >", " ?",
182 " @", " A", " B", " C", " D", " E", " F", " G",
183 " H", " I", " J", " K", " L", " M", " N", " O",
184 " P", " Q", " R", " S", " T", " U", " V", " W",
185 " X", " Y", " Z", " [", " \\", " ]", " ^", " _",
186 " `", " a", " b", " c", " d", " e", " f", " g",
187 " h", " i", " j", " k", " l", " m", " n", " o",
188 " p", " q", " r", " s", " t", " u", " v", " w",
189 " x", " y", " z", " {", " |", " }", " ~", "del"
193 do_ascii(buffer_t
*buf
, int index
)
195 uint8_t v
= get_u8(buf
, index
);
197 (void) fputc(' ', stdout
);
198 (void) fputs(ascii
[v
& 0x7f], stdout
);
201 static output_t output_ascii
= {
206 do_char(buffer_t
*buf
, int index
)
208 static int nresid
= 0;
209 static int printable
= 0;
217 uint8_t v
= get_u8(buf
, index
);
220 * If there were residual bytes from an earlier
221 * character, then just display the ** continuation
226 (void) fputs(" **", stdout
);
228 (void) printf(" %03o", v
);
235 * Peek ahead up to MB_CUR_MAX characters. This has to be
236 * done carefully because we might need to look into the next
237 * block to really know for sure.
241 if (avail
> MB_CUR_MAX
)
243 for (cnt
= 1, which
= index
+ 1; cnt
< avail
; cnt
++, which
++) {
244 scratch
[cnt
] = buf
->data
[which
& buf
->mask
];
247 /* now see if the value is a real character */
250 nb
= mbtowc(&wc
, scratch
, avail
);
252 (void) printf(" %03o", v
);
256 (void) fputs(" \\0", stdout
);
260 if (nb
&& iswprint(wc
)) {
262 (void) fputs(" ", stdout
);
263 (void) fputs(scratch
, stdout
);
269 (void) fputs(" \\0", stdout
);
270 } else if (wc
== '\b') {
271 (void) fputs(" \\b", stdout
);
272 } else if (wc
== '\f') {
273 (void) fputs(" \\f", stdout
);
274 } else if (wc
== '\n') {
275 (void) fputs(" \\n", stdout
);
276 } else if (wc
== '\r') {
277 (void) fputs(" \\r", stdout
);
278 } else if (wc
== '\t') {
279 (void) fputs(" \\t", stdout
);
281 (void) printf(" %03o", v
);
285 static output_t output_char
= {
290 * List of output formatting structures.
292 static output_t
*head
= NULL
;
293 static output_t
**tailp
= &head
;
296 add_out(output_t
*src
)
301 if ((out
= calloc(1, sizeof (*src
))) == NULL
) {
306 while ((m
% src
->width
) != 0) {
311 while (blocksize
< 16)
314 (void) memcpy(out
, src
, sizeof (*src
));
323 if (curfile
>= numfiles
)
327 if ((input
= freopen(files
[curfile
], "r", input
)) !=
333 if ((input
= fopen(files
[curfile
], "r")) != NULL
) {
338 warn("open: %s", files
[curfile
]);
351 * If we have 2 blocks of bytes available, we're done. Note
352 * that each iteration usually loads up 16 bytes, unless we
355 while ((input
!= NULL
) && (b
->navail
< (2 * blocksize
))) {
357 /* we preload the next one in advance */
360 (void) fclose(input
);
365 /* we want to read a whole block if possible */
367 if ((limit
>= 0) && (want
> limit
)) {
372 while (want
&& input
) {
375 c
= (b
->prod
+ want
> (b
->mask
+ 1)) ?
379 n
= fread(b
->data
+ b
->prod
, 1, c
, input
);
382 files
? files
[curfile
-1] : "stdin");
383 input
= next_input();
387 input
= next_input();
399 b
->data
[b
->prod
& b
->mask
] = 0;
418 do_type_string(char *typestr
)
421 errx(1, _("missing type string"));
427 add_out(&output_ascii
);
430 add_out(&output_char
);
438 add_out(&output_float
);
443 add_out(&output_double
);
447 add_out(&output_ldouble
);
451 add_out(&output_float
);
459 if (strchr(STR1
, *typestr
)) {
461 add_out(&output_sig_b
);
462 } else if (strchr(STR2
, *typestr
)) {
464 add_out(&output_sig_w
);
465 } else if (strchr(STR4
, *typestr
)) {
467 add_out(&output_sig_d
);
468 } else if (strchr(STR8
, *typestr
)) {
470 add_out(&output_sig_q
);
472 add_out(&output_sig_d
);
478 if (strchr(STR1
, *typestr
)) {
480 add_out(&output_dec_b
);
481 } else if (strchr(STR2
, *typestr
)) {
483 add_out(&output_dec_w
);
484 } else if (strchr(STR4
, *typestr
)) {
486 add_out(&output_dec_d
);
487 } else if (strchr(STR8
, *typestr
)) {
489 add_out(&output_dec_q
);
491 add_out(&output_dec_d
);
497 if (strchr(STR1
, *typestr
)) {
499 add_out(&output_oct_b
);
500 } else if (strchr(STR2
, *typestr
)) {
502 add_out(&output_oct_w
);
503 } else if (strchr(STR4
, *typestr
)) {
505 add_out(&output_oct_d
);
506 } else if (strchr(STR8
, *typestr
)) {
508 add_out(&output_oct_q
);
510 add_out(&output_oct_d
);
516 if (strchr(STR1
, *typestr
)) {
518 add_out(&output_hex_b
);
519 } else if (strchr(STR2
, *typestr
)) {
521 add_out(&output_hex_w
);
522 } else if (strchr(STR4
, *typestr
)) {
524 add_out(&output_hex_d
);
525 } else if (strchr(STR8
, *typestr
)) {
527 add_out(&output_hex_q
);
529 add_out(&output_hex_d
);
534 errx(1, _("unrecognized type string character: %c"),
542 main(int argc
, char **argv
)
547 boolean_t first
= B_TRUE
;
548 boolean_t doall
= B_FALSE
;
549 boolean_t same
= B_FALSE
;
550 boolean_t newarg
= B_FALSE
;
558 (void) setlocale(LC_ALL
, "");
559 (void) textdomain(TEXT_DOMAIN
);
561 while ((c
= getopt(argc
, argv
, "A:bCcdDfFj:N:oOsSxXvt:")) != EOF
) {
565 if (strlen(optarg
) > 1) {
583 * You could argue that the code should
584 * use the same 7 spaces. Legacy uses 8
585 * though. Oh well. Better to avoid
595 if (strlen(optarg
) != 1) {
599 warnx(_("invalid address base, "
600 "must be o, d, x, or n"));
604 add_out(&output_oct_b
);
609 add_out(&output_char
);
613 add_out(&output_float
);
617 add_out(&output_double
);
621 add_out(&output_dec_w
);
625 add_out(&output_dec_d
);
630 do_type_string(optarg
);
634 add_out(&output_oct_w
);
638 add_out(&output_oct_d
);
642 add_out(&output_sig_w
);
646 add_out(&output_sig_d
);
650 add_out(&output_hex_w
);
654 add_out(&output_hex_d
);
663 skip
= strtoll(optarg
, &eptr
, 0);
665 skip
<<= 9; /* 512 bytes */
667 } else if (*eptr
== 'k') {
668 skip
<<= 10; /* 1k */
670 } else if (*eptr
== 'm') {
671 skip
<<= 20; /* 1m */
673 } else if (*eptr
== 'g') {
674 skip
<<= 30; /* 1g */
677 if ((skip
< 0) || (eptr
[0] != 0)) {
678 warnx(_("invalid skip count '%s' specified"),
686 limit
= strtoll(optarg
, &eptr
, 0);
688 * POSIX doesn't specify this, but I think these
694 } else if (*eptr
== 'k') {
697 } else if (*eptr
== 'm') {
700 } else if (*eptr
== 'g') {
704 if ((limit
< 0) || (eptr
[0] != 0)) {
705 warnx(_("invalid byte count '%s' specified"),
717 /* this finds the smallest power of two size we can use */
718 buffer
.mask
= (1 << (ffs(blocksize
* 3) + 1)) - 1;
719 buffer
.data
= memalign(16, buffer
.mask
+ 1);
720 if (buffer
.data
== NULL
) {
726 * Wow. This option parsing is hideous.
728 * If the we've not seen a new option, and there is just one
729 * operand, if it starts with a "+", then treat it as an
730 * offset. Otherwise if two operands, and the second operand
731 * starts with + or a digit, then it is an offset.
734 if (((argc
- optind
) == 1) && (argv
[optind
][0] == '+')) {
735 offstr
= argv
[optind
];
737 } else if (((argc
- optind
) == 2) &&
738 (strchr("+0123456789", (argv
[optind
+ 1][0])) != NULL
)) {
739 offstr
= argv
[optind
+ 1];
747 if (*offstr
== '+') {
751 if ((strncmp(offstr
, "0x", 2) == 0)) {
755 if (offstr
[l
- 1] == 'B') {
763 if ((offstr
[l
- 1] == 'B') || (offstr
[l
- 1] == 'b')) {
768 if (offstr
[l
- 1] == '.') {
774 skip
= strtoll(offstr
, &eptr
, base
);
776 errx(1, _("invalid offset string specified"));
783 * Allocate an array for all the input files.
786 files
= calloc(sizeof (char *), argc
- optind
);
787 for (i
= 0; i
< argc
- optind
; i
++) {
788 files
[i
] = argv
[optind
+ i
];
791 input
= next_input();
797 * We need to seek ahead. fseek would be faster.
799 while (skip
&& (input
!= NULL
)) {
803 * Only fseek() on regular files. (Others
806 if (fstat(fileno(input
), &sbuf
) < 0) {
807 warn("fstat: %s", files
[curfile
-1]);
808 input
= next_input();
811 if (S_ISREG(sbuf
.st_mode
)) {
813 * No point in seeking a file that is too
814 * short to begin with.
816 if (sbuf
.st_size
< skip
) {
817 skip
-= sbuf
.st_size
;
818 input
= next_input();
821 if (fseeko(input
, skip
, SEEK_SET
) < 0) {
822 err(1, "fseek:%s", files
[curfile
-1]);
830 * fgetc seems like it would be slow, but it uses
831 * buffered I/O, so it should be fast enough.
835 if (getc_unlocked(input
) == EOF
) {
838 warn("read: %s", files
[curfile
-1]);
840 input
= next_input();
853 add_out(&output_oct_w
);
860 for (refill(&buffer
); buffer
.navail
> 0; refill(&buffer
)) {
866 * If this buffer was the same as last, then just
869 if ((!first
) && (buffer
.navail
>= blocksize
) && (!doall
)) {
872 for (i
= 0; i
< blocksize
; i
++) {
873 if (buffer
.data
[j
& buffer
.mask
] !=
874 buffer
.data
[k
& buffer
.mask
]) {
880 if (i
== blocksize
) {
882 (void) fputs("*\n", stdout
);
885 buffer
.navail
-= blocksize
;
887 buffer
.cons
+= blocksize
;
888 buffer
.cons
&= buffer
.mask
;
895 mx
= (buffer
.navail
> blocksize
) ? blocksize
: buffer
.navail
;
897 for (out
= head
; out
!= NULL
; out
= out
->next
) {
900 /*LINTED E_SEC_PRINTF_VAR_FMT*/
901 (void) printf(afmt
, offset
);
903 (void) fputs(cfmt
, stdout
);
905 for (i
= 0, j
= buffer
.cons
; i
< mx
; i
+= out
->width
) {
906 out
->func(&buffer
, j
);
910 (void) fputs("\n", stdout
);
913 buffer
.cons
&= buffer
.mask
;
917 /*LINTED E_SEC_PRINTF_VAR_FMT*/
918 (void) printf(afmt
, offset
);
919 (void) fputs("\n", stdout
);