cut: shorten error messages on bad syntax even more
[busybox-git.git] / coreutils / od_bloaty.c
blobe886a4ed28a46b19f14b098e0a351a46fa8eb349
1 /* od -- dump files in octal and other formats
2 Copyright (C) 92, 1995-2004 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 /* Written by Jim Meyering. */
19 /* Busyboxed by Denys Vlasenko, based on od.c from coreutils-5.2.1 */
22 /* #include "libbb.h" - done in od.c */
23 #include "common_bufsiz.h"
24 #define assert(a) ((void)0)
27 //usage:#if ENABLE_DESKTOP
28 //usage:#define od_trivial_usage
29 //usage: "[-abcdfhilovxs] [-t TYPE] [-A RADIX] [-N SIZE] [-j SKIP] [-S MINSTR] [-w WIDTH] [FILE]..."
30 // We also support -BDOHXIL, but they are not documented in coreutils 9.1
31 // manpage/help, so don't show them either.
32 // We don't support:
33 // ... [FILE] [[+]OFFSET[.][b]]
34 // Support is buggy for:
35 // od --traditional [OPTION]... [FILE] [[+]OFFSET[.][b] [+][LABEL][.][b]]
37 //usage:#define od_full_usage "\n\n"
38 //usage: "Print FILEs (or stdin) unambiguously, as octal bytes by default"
39 //usage:#endif
41 enum {
42 OPT_A = 1 << 0,
43 OPT_N = 1 << 1,
44 OPT_a = 1 << 2,
45 OPT_b = 1 << 3,
46 OPT_c = 1 << 4,
47 OPT_d = 1 << 5,
48 OPT_D = 1 << 6, /* undocumented in coreutils 9.1 */
49 OPT_f = 1 << 7,
50 OPT_h = 1 << 8,
51 OPT_H = 1 << 9, /* undocumented in coreutils 9.1 */
52 OPT_i = 1 << 10,
53 OPT_I = 1 << 11, /* undocumented in coreutils 9.1 */
54 OPT_j = 1 << 12,
55 OPT_l = 1 << 13,
56 OPT_L = 1 << 14, /* undocumented in coreutils 9.1 */
57 OPT_o = 1 << 15,
58 OPT_O = 1 << 16, /* undocumented in coreutils 9.1 */
59 OPT_B = 1 << 17, /* undocumented synonym to -o */
60 OPT_t = 1 << 18,
61 /* When zero and two or more consecutive blocks are equal, format
62 only the first block and output an asterisk alone on the following
63 line to indicate that identical blocks have been elided: */
64 OPT_v = 1 << 19,
65 OPT_x = 1 << 20,
66 OPT_X = 1 << 21, /* undocumented in coreutils 9.1 */
67 OPT_s = 1 << 22,
68 OPT_S = 1 << 23,
69 OPT_w = 1 << 24,
70 OPT_traditional = (1 << 25) * ENABLE_LONG_OPTS,
73 #define OD_GETOPT32() getopt32long(argv, \
74 "A:N:abcdDfhHiIj:lLoOBt:*vxXsS:w:+:", od_longopts, \
75 /* -w with optional param */ \
76 /* -S was -s and also had optional parameter */ \
77 /* but in coreutils 6.3 it was renamed and now has */ \
78 /* _mandatory_ parameter */ \
79 &str_A, &str_N, &str_j, &lst_t, &str_S, &G.bytes_per_block)
82 /* Check for 0x7f is a coreutils 6.3 addition */
83 #define ISPRINT(c) (((c) >= ' ') && (c) < 0x7f)
85 typedef long double longdouble_t;
86 typedef unsigned long long ulonglong_t;
87 typedef long long llong;
89 #if ENABLE_LFS
90 # define xstrtooff_sfx xstrtoull_sfx
91 #else
92 # define xstrtooff_sfx xstrtoul_sfx
93 #endif
95 /* The default number of input bytes per output line. */
96 #define DEFAULT_BYTES_PER_BLOCK 16
98 /* The number of decimal digits of precision in a float. */
99 #ifndef FLT_DIG
100 # define FLT_DIG 7
101 #endif
103 /* The number of decimal digits of precision in a double. */
104 #ifndef DBL_DIG
105 # define DBL_DIG 15
106 #endif
108 /* The number of decimal digits of precision in a long double. */
109 #ifndef LDBL_DIG
110 # define LDBL_DIG DBL_DIG
111 #endif
113 enum size_spec {
114 NO_SIZE,
115 CHAR,
116 SHORT,
117 INT,
118 LONG,
119 LONG_LONG,
120 FLOAT_SINGLE,
121 FLOAT_DOUBLE,
122 FLOAT_LONG_DOUBLE,
123 N_SIZE_SPECS
126 enum output_format {
127 SIGNED_DECIMAL,
128 UNSIGNED_DECIMAL,
129 OCTAL,
130 HEXADECIMAL,
131 FLOATING_POINT,
132 NAMED_CHARACTER,
133 CHARACTER
136 /* Each output format specification (from '-t spec' or from
137 old-style options) is represented by one of these structures. */
138 struct tspec {
139 enum output_format fmt;
140 enum size_spec size;
141 void (*print_function) (size_t, const char *, const char *);
142 char *fmt_string;
143 int hexl_mode_trailer;
144 int field_width;
147 /* Convert the number of 8-bit bytes of a binary representation to
148 the number of characters (digits + sign if the type is signed)
149 required to represent the same quantity in the specified base/type.
150 For example, a 32-bit (4-byte) quantity may require a field width
151 as wide as the following for these types:
152 11 unsigned octal
153 11 signed decimal
154 10 unsigned decimal
155 8 unsigned hexadecimal */
157 static const uint8_t bytes_to_oct_digits[] ALIGN1 =
158 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
160 static const uint8_t bytes_to_signed_dec_digits[] ALIGN1 =
161 {1, 4, 6, 8, 11, 13, 16, 18, 20, 23, 25, 28, 30, 33, 35, 37, 40};
163 static const uint8_t bytes_to_unsigned_dec_digits[] ALIGN1 =
164 {0, 3, 5, 8, 10, 13, 15, 17, 20, 22, 25, 27, 29, 32, 34, 37, 39};
166 static const uint8_t bytes_to_hex_digits[] ALIGN1 =
167 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
169 /* Convert enum size_spec to the size of the named type. */
170 static const signed char width_bytes[] ALIGN1 = {
172 sizeof(char),
173 sizeof(short),
174 sizeof(int),
175 sizeof(long),
176 sizeof(ulonglong_t),
177 sizeof(float),
178 sizeof(double),
179 sizeof(longdouble_t)
181 /* Ensure that for each member of 'enum size_spec' there is an
182 initializer in the width_bytes array. */
183 struct ERR_width_bytes_has_bad_size {
184 char ERR_width_bytes_has_bad_size[ARRAY_SIZE(width_bytes) == N_SIZE_SPECS ? 1 : -1];
187 struct globals {
188 smallint exit_code;
190 unsigned string_min;
192 /* An array of specs describing how to format each input block. */
193 unsigned n_specs;
194 struct tspec *spec;
196 /* Function that accepts an address and an optional following char,
197 and prints the address and char to stdout. */
198 void (*format_address)(off_t, char);
200 /* The difference between the old-style pseudo starting address and
201 the number of bytes to skip. */
202 #if ENABLE_LONG_OPTS
203 off_t pseudo_offset;
204 # define G_pseudo_offset G.pseudo_offset
205 #endif
206 /* When zero, MAX_BYTES_TO_FORMAT and END_OFFSET are ignored, and all
207 input is formatted. */
209 /* The number of input bytes formatted per output line. It must be
210 a multiple of the least common multiple of the sizes associated with
211 the specified output types. It should be as large as possible, but
212 no larger than 16 -- unless specified with the -w option. */
213 unsigned bytes_per_block; /* have to use unsigned, not size_t */
215 /* A NULL-terminated list of the file-arguments from the command line. */
216 const char *const *file_list;
218 /* The input stream associated with the current file. */
219 FILE *in_stream;
221 bool not_first;
222 bool prev_pair_equal;
224 char address_fmt[sizeof("%0n"OFF_FMT"xc")];
225 } FIX_ALIASING;
226 /* Corresponds to 'x' above */
227 #define address_base_char G.address_fmt[sizeof(G.address_fmt)-3]
228 /* Corresponds to 'n' above */
229 #define address_pad_len_char G.address_fmt[2]
231 #if !ENABLE_LONG_OPTS
232 enum { G_pseudo_offset = 0 };
233 #endif
234 #define G (*(struct globals*)bb_common_bufsiz1)
235 #define INIT_G() do { \
236 setup_common_bufsiz(); \
237 BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
238 G.bytes_per_block = 32; \
239 strcpy(G.address_fmt, "%0n"OFF_FMT"xc"); \
240 } while (0)
243 #define MAX_INTEGRAL_TYPE_SIZE sizeof(ulonglong_t)
244 static const unsigned char integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1] ALIGN1 = {
245 [sizeof(char)] = CHAR,
246 #if USHRT_MAX != UCHAR_MAX
247 [sizeof(short)] = SHORT,
248 #endif
249 #if UINT_MAX != USHRT_MAX
250 [sizeof(int)] = INT,
251 #endif
252 #if ULONG_MAX != UINT_MAX
253 [sizeof(long)] = LONG,
254 #endif
255 #if ULLONG_MAX != ULONG_MAX
256 [sizeof(ulonglong_t)] = LONG_LONG,
257 #endif
260 #define MAX_FP_TYPE_SIZE sizeof(longdouble_t)
261 static const unsigned char fp_type_size[MAX_FP_TYPE_SIZE + 1] ALIGN1 = {
262 /* gcc seems to allow repeated indexes. Last one wins */
263 [sizeof(longdouble_t)] = FLOAT_LONG_DOUBLE,
264 [sizeof(double)] = FLOAT_DOUBLE,
265 [sizeof(float)] = FLOAT_SINGLE
269 static unsigned
270 gcd(unsigned u, unsigned v)
272 unsigned t;
273 while (v != 0) {
274 t = u % v;
275 u = v;
276 v = t;
278 return u;
281 /* Compute the least common multiple of U and V. */
282 static unsigned
283 lcm(unsigned u, unsigned v) {
284 unsigned t = gcd(u, v);
285 if (t == 0)
286 return 0;
287 return u * v / t;
290 static void
291 print_s_char(size_t n_bytes, const char *block, const char *fmt_string)
293 while (n_bytes--) {
294 int tmp = *(signed char *) block;
295 printf(fmt_string, tmp);
296 block += sizeof(unsigned char);
300 static void
301 print_char(size_t n_bytes, const char *block, const char *fmt_string)
303 while (n_bytes--) {
304 unsigned tmp = *(unsigned char *) block;
305 printf(fmt_string, tmp);
306 block += sizeof(unsigned char);
310 static void
311 print_s_short(size_t n_bytes, const char *block, const char *fmt_string)
313 n_bytes /= sizeof(signed short);
314 while (n_bytes--) {
315 int tmp = *(signed short *) block;
316 printf(fmt_string, tmp);
317 block += sizeof(unsigned short);
321 static void
322 print_short(size_t n_bytes, const char *block, const char *fmt_string)
324 n_bytes /= sizeof(unsigned short);
325 while (n_bytes--) {
326 unsigned tmp = *(unsigned short *) block;
327 printf(fmt_string, tmp);
328 block += sizeof(unsigned short);
332 static void
333 print_int(size_t n_bytes, const char *block, const char *fmt_string)
335 n_bytes /= sizeof(unsigned);
336 while (n_bytes--) {
337 unsigned tmp = *(unsigned *) block;
338 printf(fmt_string, tmp);
339 block += sizeof(unsigned);
343 #if UINT_MAX == ULONG_MAX
344 # define print_long print_int
345 #else
346 static void
347 print_long(size_t n_bytes, const char *block, const char *fmt_string)
349 n_bytes /= sizeof(unsigned long);
350 while (n_bytes--) {
351 unsigned long tmp = *(unsigned long *) block;
352 printf(fmt_string, tmp);
353 block += sizeof(unsigned long);
356 #endif
358 #if ULONG_MAX == ULLONG_MAX
359 # define print_long_long print_long
360 #else
361 static void
362 print_long_long(size_t n_bytes, const char *block, const char *fmt_string)
364 n_bytes /= sizeof(ulonglong_t);
365 while (n_bytes--) {
366 ulonglong_t tmp = *(ulonglong_t *) block;
367 printf(fmt_string, tmp);
368 block += sizeof(ulonglong_t);
371 #endif
373 static void
374 print_float(size_t n_bytes, const char *block, const char *fmt_string)
376 n_bytes /= sizeof(float);
377 while (n_bytes--) {
378 float tmp = *(float *) block;
379 printf(fmt_string, tmp);
380 block += sizeof(float);
384 static void
385 print_double(size_t n_bytes, const char *block, const char *fmt_string)
387 n_bytes /= sizeof(double);
388 while (n_bytes--) {
389 double tmp = *(double *) block;
390 printf(fmt_string, tmp);
391 block += sizeof(double);
395 static void
396 print_long_double(size_t n_bytes, const char *block, const char *fmt_string)
398 n_bytes /= sizeof(longdouble_t);
399 while (n_bytes--) {
400 longdouble_t tmp = *(longdouble_t *) block;
401 printf(fmt_string, tmp);
402 block += sizeof(longdouble_t);
406 /* print_[named]_ascii are optimized for speed.
407 * Remember, someday you may want to pump gigabytes through this thing.
408 * Saving a dozen of .text bytes here is counter-productive */
410 static void
411 print_named_ascii(size_t n_bytes, const char *block,
412 const char *unused_fmt_string UNUSED_PARAM)
414 /* Names for some non-printing characters. */
415 static const char charname[33][3] ALIGN1 = {
416 "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
417 " bs", " ht", " nl", " vt", " ff", " cr", " so", " si",
418 "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
419 "can", " em", "sub", "esc", " fs", " gs", " rs", " us",
420 " sp"
422 // buf[N] pos: 01234 56789
423 char buf[12] = " x\0 xxx\0";
424 // [12] because we take three 32bit stack slots anyway, and
425 // gcc is too dumb to initialize with constant stores,
426 // it copies initializer from rodata. Oh well.
427 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65410
429 while (n_bytes--) {
430 unsigned masked_c = *(unsigned char *) block++;
432 masked_c &= 0x7f;
433 if (masked_c == 0x7f) {
434 fputs_stdout(" del");
435 continue;
437 if (masked_c > ' ') {
438 buf[3] = masked_c;
439 fputs_stdout(buf);
440 continue;
442 /* Why? Because printf(" %3.3s") is much slower... */
443 buf[6] = charname[masked_c][0];
444 buf[7] = charname[masked_c][1];
445 buf[8] = charname[masked_c][2];
446 fputs_stdout(buf+5);
450 static void
451 print_ascii(size_t n_bytes, const char *block,
452 const char *unused_fmt_string UNUSED_PARAM)
454 // buf[N] pos: 01234 56789
455 char buf[12] = " x\0 xxx\0";
457 while (n_bytes--) {
458 const char *s;
459 unsigned c = *(unsigned char *) block++;
461 if (ISPRINT(c)) {
462 buf[3] = c;
463 fputs_stdout(buf);
464 continue;
466 switch (c) {
467 case '\0':
468 s = " \\0";
469 break;
470 case '\007':
471 s = " \\a";
472 break;
473 case '\b':
474 s = " \\b";
475 break;
476 case '\f':
477 s = " \\f";
478 break;
479 case '\n':
480 s = " \\n";
481 break;
482 case '\r':
483 s = " \\r";
484 break;
485 case '\t':
486 s = " \\t";
487 break;
488 case '\v':
489 s = " \\v";
490 break;
491 default:
492 buf[6] = (c >> 6 & 3) + '0';
493 buf[7] = (c >> 3 & 7) + '0';
494 buf[8] = (c & 7) + '0';
495 s = buf + 5;
497 fputs_stdout(s);
501 /* Given a list of one or more input filenames FILE_LIST, set the global
502 file pointer IN_STREAM and the global string INPUT_FILENAME to the
503 first one that can be successfully opened. Modify FILE_LIST to
504 reference the next filename in the list. A file name of "-" is
505 interpreted as standard input. If any file open fails, give an error
506 message and return nonzero. */
508 static void
509 open_next_file(void)
511 while (1) {
512 if (!*G.file_list)
513 return;
514 G.in_stream = fopen_or_warn_stdin(*G.file_list++);
515 if (G.in_stream) {
516 break;
518 G.exit_code = 1;
521 if ((option_mask32 & (OPT_N|OPT_S)) == OPT_N)
522 setbuf(G.in_stream, NULL);
525 /* Test whether there have been errors on in_stream, and close it if
526 it is not standard input. Return nonzero if there has been an error
527 on in_stream or stdout; return zero otherwise. This function will
528 report more than one error only if both a read and a write error
529 have occurred. IN_ERRNO, if nonzero, is the error number
530 corresponding to the most recent action for IN_STREAM. */
532 static void
533 check_and_close(void)
535 if (G.in_stream) {
536 if (ferror(G.in_stream)) {
537 bb_error_msg("%s: read error", (G.in_stream == stdin)
538 ? bb_msg_standard_input
539 : G.file_list[-1]
541 G.exit_code = 1;
543 fclose_if_not_stdin(G.in_stream);
544 G.in_stream = NULL;
547 if (ferror(stdout)) {
548 bb_simple_error_msg_and_die(bb_msg_write_error);
552 /* If S points to a single valid modern od format string, put
553 a description of that format in *TSPEC, return pointer to
554 character following the just-decoded format.
555 For example, if S were "d4afL", we will return a rtp to "afL"
556 and *TSPEC would be
558 fmt = SIGNED_DECIMAL;
559 size = INT or LONG; (whichever integral_type_size[4] resolves to)
560 print_function = print_int; (assuming size == INT)
561 fmt_string = "%011d%c";
563 S_ORIG is solely for reporting errors. It should be the full format
564 string argument. */
566 static NOINLINE const char *
567 decode_one_format(const char *s_orig, const char *s, struct tspec *tspec)
569 enum size_spec size_spec;
570 unsigned size;
571 enum output_format fmt;
572 const char *p;
573 char *end;
574 char *fmt_string = NULL;
575 void (*print_function) (size_t, const char *, const char *);
576 unsigned c;
577 unsigned field_width = 0;
578 int pos;
580 switch (*s) {
581 case 'd':
582 case 'o':
583 case 'u':
584 case 'x': {
585 static const char CSIL[] ALIGN1 = "CSIL";
587 c = *s++;
588 p = strchr(CSIL, *s);
589 /* if *s == NUL, p != NULL! Testcase: "od -tx" */
590 if (!p || *p == '\0') {
591 size = sizeof(int);
592 if (isdigit(s[0])) {
593 size = bb_strtou(s, &end, 0);
594 if (errno == ERANGE
595 || MAX_INTEGRAL_TYPE_SIZE < size
596 || integral_type_size[size] == NO_SIZE
598 bb_error_msg_and_die("invalid type string '%s'; "
599 "%u-byte %s type is not supported",
600 s_orig, size, "integral");
602 s = end;
604 } else {
605 static const uint8_t CSIL_sizeof[4] = {
606 sizeof(char),
607 sizeof(short),
608 sizeof(int),
609 sizeof(long),
611 size = CSIL_sizeof[p - CSIL];
612 s++; /* skip C/S/I/L */
615 #define ISPEC_TO_FORMAT(Spec, Min_format, Long_format, Max_format) \
616 ((Spec) == LONG_LONG ? (Max_format) \
617 : ((Spec) == LONG ? (Long_format) : (Min_format)))
619 #define FMT_BYTES_ALLOCATED 9
620 size_spec = integral_type_size[size];
623 static const char doux[] ALIGN1 = "doux";
624 static const char doux_fmt_letter[][4] = {
625 "lld", "llo", "llu", "llx"
627 static const enum output_format doux_fmt[] = {
628 SIGNED_DECIMAL,
629 OCTAL,
630 UNSIGNED_DECIMAL,
631 HEXADECIMAL,
633 static const uint8_t *const doux_bytes_to_XXX[] = {
634 bytes_to_signed_dec_digits,
635 bytes_to_oct_digits,
636 bytes_to_unsigned_dec_digits,
637 bytes_to_hex_digits,
639 static const char doux_fmtstring[][sizeof(" %%0%u%s")] ALIGN1 = {
640 " %%%u%s",
641 " %%0%u%s",
642 " %%%u%s",
643 " %%0%u%s",
646 pos = strchr(doux, c) - doux;
647 fmt = doux_fmt[pos];
648 field_width = doux_bytes_to_XXX[pos][size];
649 p = doux_fmt_letter[pos] + 2;
650 if (size_spec == LONG) p--;
651 if (size_spec == LONG_LONG) p -= 2;
652 fmt_string = xasprintf(doux_fmtstring[pos], field_width, p);
655 switch (size_spec) {
656 case CHAR:
657 print_function = (fmt == SIGNED_DECIMAL
658 ? print_s_char
659 : print_char);
660 break;
661 case SHORT:
662 print_function = (fmt == SIGNED_DECIMAL
663 ? print_s_short
664 : print_short);
665 break;
666 case INT:
667 print_function = print_int;
668 break;
669 case LONG:
670 print_function = print_long;
671 break;
672 default: /* case LONG_LONG: */
673 print_function = print_long_long;
674 break;
676 break;
679 case 'f': {
680 static const char FDL[] ALIGN1 = "FDL";
682 fmt = FLOATING_POINT;
683 ++s;
684 p = strchr(FDL, *s);
685 if (!p || *p == '\0') {
686 size = sizeof(double);
687 if (isdigit(s[0])) {
688 size = bb_strtou(s, &end, 0);
689 if (errno == ERANGE || size > MAX_FP_TYPE_SIZE
690 || fp_type_size[size] == NO_SIZE
692 bb_error_msg_and_die("invalid type string '%s'; "
693 "%u-byte %s type is not supported",
694 s_orig, size, "floating point");
696 s = end;
698 } else {
699 static const uint8_t FDL_sizeof[] = {
700 sizeof(float),
701 sizeof(double),
702 sizeof(longdouble_t),
705 size = FDL_sizeof[p - FDL];
706 s++; /* skip F/D/L */
709 size_spec = fp_type_size[size];
711 switch (size_spec) {
712 case FLOAT_SINGLE:
713 print_function = print_float;
714 field_width = FLT_DIG + 8;
715 /* Don't use %#e; not all systems support it. */
716 fmt_string = xasprintf(" %%%d.%de", field_width, FLT_DIG);
717 break;
718 case FLOAT_DOUBLE:
719 print_function = print_double;
720 field_width = DBL_DIG + 8;
721 fmt_string = xasprintf(" %%%d.%de", field_width, DBL_DIG);
722 break;
723 default: /* case FLOAT_LONG_DOUBLE: */
724 print_function = print_long_double;
725 field_width = LDBL_DIG + 8;
726 fmt_string = xasprintf(" %%%d.%dLe", field_width, LDBL_DIG);
727 break;
729 break;
732 case 'a':
733 ++s;
734 fmt = NAMED_CHARACTER;
735 size_spec = CHAR;
736 print_function = print_named_ascii;
737 field_width = 3;
738 break;
739 case 'c':
740 ++s;
741 fmt = CHARACTER;
742 size_spec = CHAR;
743 print_function = print_ascii;
744 field_width = 3;
745 break;
746 default:
747 bb_error_msg_and_die("invalid character '%c' "
748 "in type string '%s'", *s, s_orig);
751 tspec->size = size_spec;
752 tspec->fmt = fmt;
753 tspec->print_function = print_function;
754 tspec->fmt_string = fmt_string;
756 tspec->field_width = field_width;
757 tspec->hexl_mode_trailer = (*s == 'z');
758 if (tspec->hexl_mode_trailer)
759 s++;
761 return s;
764 /* Decode the modern od format string S. Append the decoded
765 representation to the global array SPEC, reallocating SPEC if
766 necessary. */
768 static void
769 decode_format_string(const char *s)
771 const char *s_orig = s;
773 while (*s != '\0') {
774 struct tspec tspec;
775 const char *next;
777 next = decode_one_format(s_orig, s, &tspec);
779 assert(s != next);
780 s = next;
781 G.spec = xrealloc_vector(G.spec, 4, G.n_specs);
782 memcpy(&G.spec[G.n_specs], &tspec, sizeof(G.spec[0]));
783 G.n_specs++;
787 /* Given a list of one or more input filenames FILE_LIST, set the global
788 file pointer IN_STREAM to position N_SKIP in the concatenation of
789 those files. If any file operation fails or if there are fewer than
790 N_SKIP bytes in the combined input, give an error message and return
791 nonzero. When possible, use seek rather than read operations to
792 advance IN_STREAM. */
794 static void
795 skip(off_t n_skip)
797 if (n_skip == 0)
798 return;
800 while (G.in_stream) { /* !EOF */
801 struct stat file_stats;
803 /* First try seeking. For large offsets, this extra work is
804 worthwhile. If the offset is below some threshold it may be
805 more efficient to move the pointer by reading. There are two
806 issues when trying to seek:
807 - the file must be seekable.
808 - before seeking to the specified position, make sure
809 that the new position is in the current file.
810 Try to do that by getting file's size using fstat.
811 But that will work only for regular files. */
813 /* The st_size field is valid only for regular files
814 (and for symbolic links, which cannot occur here).
815 If the number of bytes left to skip is at least
816 as large as the size of the current file, we can
817 decrement n_skip and go on to the next file. */
818 if (fstat(fileno(G.in_stream), &file_stats) == 0
819 && S_ISREG(file_stats.st_mode) && file_stats.st_size > 0
821 if (file_stats.st_size < n_skip) {
822 n_skip -= file_stats.st_size;
823 /* take "check & close / open_next" route */
824 } else {
825 if (fseeko(G.in_stream, n_skip, SEEK_CUR) != 0)
826 G.exit_code = 1;
827 return;
829 } else {
830 /* If it's not a regular file with positive size,
831 position the file pointer by reading. */
832 char buf[1024];
833 size_t n_bytes_to_read = 1024;
834 size_t n_bytes_read;
836 while (n_skip > 0) {
837 if (n_skip < n_bytes_to_read)
838 n_bytes_to_read = n_skip;
839 n_bytes_read = fread(buf, 1, n_bytes_to_read, G.in_stream);
840 n_skip -= n_bytes_read;
841 if (n_bytes_read != n_bytes_to_read)
842 break; /* EOF on this file or error */
845 if (n_skip == 0)
846 return;
848 check_and_close();
849 open_next_file();
852 if (n_skip)
853 bb_simple_error_msg_and_die("can't skip past end of combined input");
857 typedef void FN_format_address(off_t address, char c);
859 static void
860 format_address_none(off_t address UNUSED_PARAM, char c UNUSED_PARAM)
864 static void
865 format_address_std(off_t address, char c)
867 /* Corresponds to 'c' */
868 G.address_fmt[sizeof(G.address_fmt)-2] = c;
869 printf(G.address_fmt, address);
872 #if ENABLE_LONG_OPTS
873 /* only used with --traditional */
874 static void
875 format_address_paren(off_t address, char c)
877 putchar('(');
878 format_address_std(address, ')');
879 if (c) putchar(c);
882 static void
883 format_address_label(off_t address, char c)
885 format_address_std(address, ' ');
886 format_address_paren(address + G_pseudo_offset, c);
888 #endif
890 static void
891 dump_hexl_mode_trailer(size_t n_bytes, const char *block)
893 fputs_stdout(" >");
894 while (n_bytes--) {
895 unsigned c = *(unsigned char *) block++;
896 c = (ISPRINT(c) ? c : '.');
897 putchar(c);
899 putchar('<');
902 /* Write N_BYTES bytes from CURR_BLOCK to standard output once for each
903 of the N_SPEC format specs. CURRENT_OFFSET is the byte address of
904 CURR_BLOCK in the concatenation of input files, and it is printed
905 (optionally) only before the output line associated with the first
906 format spec. When duplicate blocks are being abbreviated, the output
907 for a sequence of identical input blocks is the output for the first
908 block followed by an asterisk alone on a line. It is valid to compare
909 the blocks PREV_BLOCK and CURR_BLOCK only when N_BYTES == BYTES_PER_BLOCK.
910 That condition may be false only for the last input block -- and then
911 only when it has not been padded to length BYTES_PER_BLOCK. */
913 static void
914 write_block(off_t current_offset, size_t n_bytes,
915 const char *prev_block, const char *curr_block)
917 unsigned i;
919 if (!(option_mask32 & OPT_v)
920 && G.not_first
921 && n_bytes == G.bytes_per_block
922 && memcmp(prev_block, curr_block, G.bytes_per_block) == 0
924 if (G.prev_pair_equal) {
925 /* The two preceding blocks were equal, and the current
926 block is the same as the last one, so print nothing. */
927 } else {
928 puts("*");
929 G.prev_pair_equal = 1;
931 } else {
932 G.not_first = 1;
933 G.prev_pair_equal = 0;
934 for (i = 0; i < G.n_specs; i++) {
935 if (i == 0)
936 G.format_address(current_offset, '\0');
937 else
938 printf("%*s", address_pad_len_char - '0', "");
939 (*G.spec[i].print_function) (n_bytes, curr_block, G.spec[i].fmt_string);
940 if (G.spec[i].hexl_mode_trailer) {
941 /* space-pad out to full line width, then dump the trailer */
942 unsigned datum_width = width_bytes[G.spec[i].size];
943 unsigned blank_fields = (G.bytes_per_block - n_bytes) / datum_width;
944 unsigned field_width = G.spec[i].field_width + 1;
945 printf("%*s", blank_fields * field_width, "");
946 dump_hexl_mode_trailer(n_bytes, curr_block);
948 putchar('\n');
953 static void
954 read_block(size_t n, char *block, size_t *n_bytes_in_buffer)
956 assert(0 < n && n <= G.bytes_per_block);
958 *n_bytes_in_buffer = 0;
960 if (n == 0)
961 return;
963 while (G.in_stream != NULL) { /* EOF. */
964 size_t n_needed;
965 size_t n_read;
967 n_needed = n - *n_bytes_in_buffer;
968 n_read = fread(block + *n_bytes_in_buffer, 1, n_needed, G.in_stream);
969 *n_bytes_in_buffer += n_read;
970 if (n_read == n_needed)
971 break;
972 /* error check is done in check_and_close */
973 check_and_close();
974 open_next_file();
978 /* Return the least common multiple of the sizes associated
979 with the format specs. */
981 static int
982 get_lcm(void)
984 size_t i;
985 int l_c_m = 1;
987 for (i = 0; i < G.n_specs; i++)
988 l_c_m = lcm(l_c_m, width_bytes[(int) G.spec[i].size]);
989 return l_c_m;
992 /* Read a chunk of size BYTES_PER_BLOCK from the input files, write the
993 formatted block to standard output, and repeat until the specified
994 maximum number of bytes has been read or until all input has been
995 processed. If the last block read is smaller than BYTES_PER_BLOCK
996 and its size is not a multiple of the size associated with a format
997 spec, extend the input block with zero bytes until its length is a
998 multiple of all format spec sizes. Write the final block. Finally,
999 write on a line by itself the offset of the byte after the last byte
1000 read. */
1002 static void
1003 dump(off_t current_offset, off_t end_offset)
1005 char *block[2];
1006 int idx;
1007 size_t n_bytes_read;
1009 block[0] = xmalloc(2 * G.bytes_per_block);
1010 block[1] = block[0] + G.bytes_per_block;
1012 idx = 0;
1013 if (option_mask32 & OPT_N) {
1014 while (1) {
1015 size_t n_needed;
1016 if (current_offset >= end_offset) {
1017 n_bytes_read = 0;
1018 break;
1020 n_needed = MIN(end_offset - current_offset, (off_t) G.bytes_per_block);
1021 read_block(n_needed, block[idx], &n_bytes_read);
1022 if (n_bytes_read < G.bytes_per_block)
1023 break;
1024 assert(n_bytes_read == G.bytes_per_block);
1025 write_block(current_offset, n_bytes_read, block[idx ^ 1], block[idx]);
1026 current_offset += n_bytes_read;
1027 idx ^= 1;
1029 } else {
1030 while (1) {
1031 read_block(G.bytes_per_block, block[idx], &n_bytes_read);
1032 if (n_bytes_read < G.bytes_per_block)
1033 break;
1034 assert(n_bytes_read == G.bytes_per_block);
1035 write_block(current_offset, n_bytes_read, block[idx ^ 1], block[idx]);
1036 current_offset += n_bytes_read;
1037 idx ^= 1;
1041 if (n_bytes_read > 0) {
1042 int l_c_m;
1043 size_t bytes_to_write;
1045 l_c_m = get_lcm();
1047 /* Make bytes_to_write the smallest multiple of l_c_m that
1048 is at least as large as n_bytes_read. */
1049 bytes_to_write = l_c_m * ((n_bytes_read + l_c_m - 1) / l_c_m);
1051 memset(block[idx] + n_bytes_read, 0, bytes_to_write - n_bytes_read);
1052 write_block(current_offset, bytes_to_write,
1053 block[idx ^ 1], block[idx]);
1054 current_offset += n_bytes_read;
1057 G.format_address(current_offset, '\n');
1059 if ((option_mask32 & OPT_N) && current_offset >= end_offset)
1060 check_and_close();
1062 free(block[0]);
1065 /* Read N bytes into BLOCK from the concatenation of the input files
1066 named in the global array FILE_LIST. On the first call to this
1067 function, the global variable IN_STREAM is expected to be an open
1068 stream associated with the input file INPUT_FILENAME. If all N
1069 bytes cannot be read from IN_STREAM, close IN_STREAM and update
1070 the global variables IN_STREAM and INPUT_FILENAME. Then try to
1071 read the remaining bytes from the newly opened file. Repeat if
1072 necessary until EOF is reached for the last file in FILE_LIST.
1073 On subsequent calls, don't modify BLOCK and return zero. Set
1074 *N_BYTES_IN_BUFFER to the number of bytes read. If an error occurs,
1075 it will be detected through ferror when the stream is about to be
1076 closed. If there is an error, give a message but continue reading
1077 as usual and return nonzero. Otherwise return zero. */
1079 /* STRINGS mode. Find each "string constant" in the input.
1080 A string constant is a run of at least 'string_min' ASCII
1081 graphic (or formatting) characters terminated by a null.
1082 Based on a function written by Richard Stallman for a
1083 traditional version of od. */
1085 static void
1086 dump_strings(off_t address, off_t end_offset)
1088 unsigned bufsize = MAX(100, G.string_min);
1089 unsigned char *buf = xmalloc(bufsize);
1091 while (1) {
1092 size_t i;
1093 int c;
1095 /* See if the next 'G.string_min' chars are all printing chars. */
1096 tryline:
1097 if ((option_mask32 & OPT_N) && (end_offset - G.string_min <= address))
1098 break;
1099 i = 0;
1100 while (!(option_mask32 & OPT_N) || address < end_offset) {
1101 if (i == bufsize) {
1102 bufsize += bufsize/8;
1103 buf = xrealloc(buf, bufsize);
1106 while (G.in_stream) { /* !EOF */
1107 c = fgetc(G.in_stream);
1108 if (c != EOF)
1109 goto got_char;
1110 check_and_close();
1111 open_next_file();
1113 /* EOF */
1114 goto ret;
1115 got_char:
1116 address++;
1117 if (!c)
1118 break;
1119 if (!ISPRINT(c))
1120 goto tryline; /* It isn't; give up on this string. */
1121 buf[i++] = c; /* String continues; store it all. */
1124 if (i < G.string_min) /* Too short! */
1125 goto tryline;
1127 /* If we get here, the string is all printable and NUL-terminated */
1128 buf[i] = 0;
1129 G.format_address(address - i - 1, ' ');
1131 for (i = 0; (c = buf[i]); i++) {
1132 switch (c) {
1133 case '\007': fputs_stdout("\\a"); break;
1134 case '\b': fputs_stdout("\\b"); break;
1135 case '\f': fputs_stdout("\\f"); break;
1136 case '\n': fputs_stdout("\\n"); break;
1137 case '\r': fputs_stdout("\\r"); break;
1138 case '\t': fputs_stdout("\\t"); break;
1139 case '\v': fputs_stdout("\\v"); break;
1140 default: putchar(c);
1143 putchar('\n');
1146 /* We reach this point only if we search through
1147 (max_bytes_to_format - G.string_min) bytes before reaching EOF. */
1148 check_and_close();
1149 ret:
1150 free(buf);
1153 #if ENABLE_LONG_OPTS
1154 /* If S is a valid traditional offset specification with an optional
1155 leading '+' return nonzero and set *OFFSET to the offset it denotes. */
1157 static int
1158 parse_old_offset(const char *s, off_t *offset)
1160 static const struct suffix_mult Bb[] ALIGN_SUFFIX = {
1161 { "B", 1024 },
1162 { "b", 512 },
1163 { "", 0 }
1165 char *p;
1166 int radix;
1168 /* Skip over any leading '+'. */
1169 if (s[0] == '+') ++s;
1170 if (!isdigit(s[0])) return 0; /* not a number */
1172 /* Determine the radix we'll use to interpret S. If there is a '.',
1173 * it's decimal, otherwise, if the string begins with '0X'or '0x',
1174 * it's hexadecimal, else octal. */
1175 p = strchr(s, '.');
1176 radix = 8;
1177 if (p) {
1178 p[0] = '\0'; /* cheating */
1179 radix = 10;
1180 } else if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
1181 radix = 16;
1183 *offset = xstrtooff_sfx(s, radix, Bb);
1184 if (p) p[0] = '.';
1186 return (*offset >= 0);
1188 #endif
1190 int od_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1191 int od_main(int argc UNUSED_PARAM, char **argv)
1193 #if ENABLE_LONG_OPTS
1194 static const char od_longopts[] ALIGN1 =
1195 "skip-bytes\0" Required_argument "j"
1196 "address-radix\0" Required_argument "A"
1197 "read-bytes\0" Required_argument "N"
1198 "format\0" Required_argument "t"
1199 "output-duplicates\0" No_argument "v"
1200 /* Yes, it's true: -S NUM, but --strings[=NUM]!
1201 * that is, NUM is mandatory for -S but optional for --strings!
1203 "strings\0" Optional_argument "S"
1204 "width\0" Optional_argument "w"
1205 "traditional\0" No_argument "\xff"
1207 #endif
1208 const char *str_A, *str_N, *str_j, *str_S = "3";
1209 llist_t *lst_t = NULL;
1210 unsigned opt;
1211 int l_c_m;
1212 /* The number of input bytes to skip before formatting and writing. */
1213 off_t n_bytes_to_skip = 0;
1214 /* The offset of the first byte after the last byte to be formatted. */
1215 off_t end_offset = 0;
1216 /* The maximum number of bytes that will be formatted. */
1217 off_t max_bytes_to_format = 0;
1219 INIT_G();
1221 /*G.spec = NULL; - already is */
1222 G.format_address = format_address_std;
1223 address_base_char = 'o';
1224 address_pad_len_char = '7';
1226 /* Parse command line */
1227 opt = OD_GETOPT32();
1228 argv += optind;
1229 if (opt & OPT_A) {
1230 static const char doxn[] ALIGN1 = "doxn";
1231 static const char doxn_address_base_char[] ALIGN1 = {
1232 'u', 'o', 'x', /* '?' fourth one is not important */
1234 static const uint8_t doxn_address_pad_len_char[] ALIGN1 = {
1235 '7', '7', '6', /* '?' */
1237 char *p;
1238 int pos;
1239 p = strchr(doxn, str_A[0]);
1240 if (!p)
1241 bb_error_msg_and_die("bad output address radix "
1242 "'%c' (must be [doxn])", str_A[0]);
1243 pos = p - doxn;
1244 if (pos == 3) G.format_address = format_address_none;
1245 address_base_char = doxn_address_base_char[pos];
1246 address_pad_len_char = doxn_address_pad_len_char[pos];
1248 if (opt & OPT_N) {
1249 max_bytes_to_format = xstrtooff_sfx(str_N, 0, bkm_suffixes);
1252 if (opt & OPT_a) decode_format_string("a");
1253 if (opt & OPT_b) decode_format_string("oC");
1254 if (opt & OPT_c) decode_format_string("c");
1255 if (opt & OPT_d) decode_format_string("u2");
1256 if (opt & OPT_D) decode_format_string("uI");
1257 if (opt & OPT_f) decode_format_string("fF");
1258 if (opt & (OPT_h|OPT_x)) decode_format_string("x2");
1259 if (opt & (OPT_H|OPT_X)) decode_format_string("xI");
1260 /* -I,L,l: depend on word width of the arch (what is "long"?) */
1261 #if ULONG_MAX > 0xffffffff
1262 if (opt & OPT_i) decode_format_string("dI");
1263 if (opt & (OPT_I|OPT_l|OPT_L)) decode_format_string("dL");
1264 #else
1265 /* 32-bit arch: -I,L,l are the same as -i */
1266 if (opt & (OPT_i|OPT_I|OPT_l|OPT_L)) decode_format_string("dI");
1267 #endif
1268 if (opt & OPT_j) n_bytes_to_skip = xstrtooff_sfx(str_j, 0, bkm_suffixes);
1269 if (opt & (OPT_o|OPT_B)) decode_format_string("o2");
1270 if (opt & OPT_O) decode_format_string("oI");
1271 while (lst_t) {
1272 decode_format_string(llist_pop(&lst_t));
1274 if (opt & OPT_s) decode_format_string("d2");
1275 if (opt & OPT_S) {
1276 G.string_min = xstrtou_sfx(str_S, 0, bkm_suffixes);
1279 // Bloat:
1280 //if ((option_mask32 & OPT_S) && G.n_specs > 0)
1281 // bb_error_msg_and_die("no type may be specified when dumping strings");
1283 /* If the --traditional option is used, there may be from
1284 * 0 to 3 remaining command line arguments; handle each case
1285 * separately.
1286 * od [FILE] [[+]OFFSET[.][b] [[+]LABEL[.][b]]]
1287 * The offset and pseudo_start have the same syntax.
1289 * FIXME: POSIX 1003.1-2001 with XSI requires support for the
1290 * traditional syntax even if --traditional is not given. */
1292 #if ENABLE_LONG_OPTS
1293 if (opt & OPT_traditional) {
1294 if (argv[0]) {
1295 off_t pseudo_start = -1;
1296 off_t o1, o2;
1298 if (!argv[1]) { /* one arg */
1299 if (parse_old_offset(argv[0], &o1)) {
1300 /* od --traditional OFFSET */
1301 n_bytes_to_skip = o1;
1302 argv++;
1304 /* od --traditional FILE */
1305 } else if (!argv[2]) { /* two args */
1306 if (parse_old_offset(argv[0], &o1)
1307 && parse_old_offset(argv[1], &o2)
1309 /* od --traditional OFFSET LABEL */
1310 n_bytes_to_skip = o1;
1311 pseudo_start = o2;
1312 argv += 2;
1313 } else if (parse_old_offset(argv[1], &o2)) {
1314 /* od --traditional FILE OFFSET */
1315 n_bytes_to_skip = o2;
1316 argv[1] = NULL;
1317 } else {
1318 bb_error_msg_and_die("invalid second argument '%s'", argv[1]);
1320 } else if (!argv[3]) { /* three args */
1321 if (parse_old_offset(argv[1], &o1)
1322 && parse_old_offset(argv[2], &o2)
1324 /* od --traditional FILE OFFSET LABEL */
1325 n_bytes_to_skip = o1;
1326 pseudo_start = o2;
1327 argv[1] = NULL;
1328 } else {
1329 bb_simple_error_msg_and_die("the last two arguments must be offsets");
1331 } else { /* >3 args */
1332 bb_simple_error_msg_and_die("too many arguments");
1335 if (pseudo_start >= 0) {
1336 if (G.format_address == format_address_none) {
1337 address_base_char = 'o';
1338 address_pad_len_char = '7';
1339 G.format_address = format_address_paren;
1340 } else {
1341 G.format_address = format_address_label;
1343 G_pseudo_offset = pseudo_start - n_bytes_to_skip;
1346 /* else: od --traditional (without args) */
1348 #endif
1350 if (option_mask32 & OPT_N) {
1351 end_offset = n_bytes_to_skip + max_bytes_to_format;
1352 if (end_offset < n_bytes_to_skip)
1353 bb_simple_error_msg_and_die("SKIP + SIZE is too large");
1356 if (G.n_specs == 0) {
1357 decode_format_string("o2");
1358 /*G.n_specs = 1; - done by decode_format_string */
1361 /* If no files were listed on the command line,
1362 set the global pointer FILE_LIST so that it
1363 references the null-terminated list of one name: "-". */
1364 G.file_list = bb_argv_dash;
1365 if (argv[0]) {
1366 /* Set the global pointer FILE_LIST so that it
1367 references the first file-argument on the command-line. */
1368 G.file_list = (char const *const *) argv;
1371 /* Open the first input file */
1372 open_next_file();
1373 /* Skip over any unwanted header bytes */
1374 skip(n_bytes_to_skip);
1375 if (!G.in_stream)
1376 return EXIT_FAILURE;
1378 /* Compute output block length */
1379 l_c_m = get_lcm();
1381 if (opt & OPT_w) { /* -w: width */
1382 if (!G.bytes_per_block || G.bytes_per_block % l_c_m != 0) {
1383 bb_error_msg("warning: invalid width %u; using %d instead",
1384 (unsigned)G.bytes_per_block, l_c_m);
1385 G.bytes_per_block = l_c_m;
1387 } else {
1388 G.bytes_per_block = l_c_m;
1389 if (l_c_m < DEFAULT_BYTES_PER_BLOCK)
1390 G.bytes_per_block *= DEFAULT_BYTES_PER_BLOCK / l_c_m;
1393 #ifdef DEBUG
1395 int i;
1396 for (i = 0; i < G.n_specs; i++) {
1397 printf("%d: fmt='%s' width=%d\n",
1398 i, G.spec[i].fmt_string,
1399 width_bytes[G.spec[i].size]);
1402 #endif
1404 if (option_mask32 & OPT_S)
1405 dump_strings(n_bytes_to_skip, end_offset);
1406 else
1407 dump(n_bytes_to_skip, end_offset);
1409 if (fclose(stdin))
1410 bb_simple_perror_msg_and_die(bb_msg_standard_input);
1412 return G.exit_code;