src/od.c

   1 /* od -- dump files in octal and other formats
   2    Copyright (C) 1992-2024 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Jim Meyering.  */
  18
  19 #include <config.h>
  20
  21 #include <ctype.h>
  22 #include <float.h>
  23 #include <stdio.h>
  24 #include <getopt.h>
  25 #include <sys/types.h>
  26 #include "system.h"
  27 #include "argmatch.h"
  28 #include "assure.h"
  29 #include "ftoastr.h"
  30 #include "quote.h"
  31 #include "stat-size.h"
  32 #include "xbinary-io.h"
  33 #include "xprintf.h"
  34 #include "xstrtol.h"
  35 #include "xstrtol-error.h"
  36
  37 /* The official name of this program (e.g., no 'g' prefix).  */
  38 #define PROGRAM_NAME "od"
  39
  40 #define AUTHORS proper_name ("Jim Meyering")
  41
  42 /* The default number of input bytes per output line.  */
  43 #define DEFAULT_BYTES_PER_BLOCK 16
  44
  45 #if HAVE_UNSIGNED_LONG_LONG_INT
  46 typedef unsigned long long int unsigned_long_long_int;
  47 #else
  48 /* This is just a place-holder to avoid a few '#if' directives.
  49    In this case, the type isn't actually used.  */
  50 typedef unsigned long int unsigned_long_long_int;
  51 #endif
  52
  53 #if FLOAT16_SUPPORTED
  54   /* Available since clang 6 (2018), and gcc 7 (2017).  */
  55   typedef _Float16 float16;
  56 #else
  57 # define FLOAT16_SUPPORTED 0
  58   /* This is just a place-holder to avoid a few '#if' directives.
  59      In this case, the type isn't actually used.  */
  60   typedef float float16;
  61 #endif
  62
  63 #if BF16_SUPPORTED
  64   /* Available since clang 11 (2020), and gcc 13 (2023). */
  65   typedef __bf16 bfloat16;
  66 #else
  67 # define BF16_SUPPORTED 0
  68   /* This is just a place-holder to avoid a few '#if' directives.
  69      In this case, the type isn't actually used.  */
  70   typedef float bfloat16;
  71 #endif
  72
  73 enum size_spec
  74   {
  75     NO_SIZE,
  76     CHAR,
  77     SHORT,
  78     INT,
  79     LONG,
  80     LONG_LONG,
  81     /* FIXME: add INTMAX support, too */
  82     FLOAT_HALF,
  83     FLOAT_SINGLE,
  84     FLOAT_DOUBLE,
  85     FLOAT_LONG_DOUBLE,
  86     N_SIZE_SPECS
  87   };
  88
  89 enum output_format
  90   {
  91     SIGNED_DECIMAL,
  92     UNSIGNED_DECIMAL,
  93     OCTAL,
  94     HEXADECIMAL,
  95     FLOATING_POINT,
  96     HFLOATING_POINT,
  97     BFLOATING_POINT,
  98     NAMED_CHARACTER,
  99     CHARACTER
 100   };
 101
 102 #define MAX_INTEGRAL_TYPE_SIZE sizeof (unsigned_long_long_int)
 103
 104 /* The maximum number of bytes needed for a format string, including
 105    the trailing nul.  Each format string expects a variable amount of
 106    padding (guaranteed to be at least 1 plus the field width), then an
 107    element that will be formatted in the field.  */
 108 enum
 109   {
 110     FMT_BYTES_ALLOCATED =
 111            (sizeof "%*.99" + 1
 112             + MAX (sizeof "ld",
 113                    MAX (sizeof "jd",
 114                         MAX (sizeof "jd",
 115                              MAX (sizeof "ju",
 116                                   sizeof "jx")))))
 117   };
 118
 119 /* Ensure that our choice for FMT_BYTES_ALLOCATED is reasonable.  */
 120 static_assert (MAX_INTEGRAL_TYPE_SIZE * CHAR_BIT / 3 <= 99);
 121
 122 /* Each output format specification (from '-t spec' or from
 123    old-style options) is represented by one of these structures.  */
 124 struct tspec
 125   {
 126     enum output_format fmt;
 127     enum size_spec size; /* Type of input object.  */
 128     /* FIELDS is the number of fields per line, BLANK is the number of
 129        fields to leave blank.  WIDTH is width of one field, excluding
 130        leading space, and PAD is total pad to divide among FIELDS.
 131        PAD is at least as large as FIELDS.  */
 132     void (*print_function) (size_t fields, size_t blank, void const *data,
 133                             char const *fmt, int width, int pad);
 134     char fmt_string[FMT_BYTES_ALLOCATED]; /* Of the style "%*d".  */
 135     bool hexl_mode_trailer;
 136     int field_width; /* Minimum width of a field, excluding leading space.  */
 137     int pad_width; /* Total padding to be divided among fields.  */
 138   };
 139
 140 /* Convert the number of 8-bit bytes of a binary representation to
 141    the number of characters (digits + sign if the type is signed)
 142    required to represent the same quantity in the specified base/type.
 143    For example, a 32-bit (4-byte) quantity may require a field width
 144    as wide as the following for these types:
 145    11   unsigned octal
 146    11   signed decimal
 147    10   unsigned decimal
 148    8    unsigned hexadecimal  */
 149
 150 static char const bytes_to_oct_digits[] =
 151 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
 152
 153 static char const bytes_to_signed_dec_digits[] =
 154 {1, 4, 6, 8, 11, 13, 16, 18, 20, 23, 25, 28, 30, 33, 35, 37, 40};
 155
 156 static char const bytes_to_unsigned_dec_digits[] =
 157 {0, 3, 5, 8, 10, 13, 15, 17, 20, 22, 25, 27, 29, 32, 34, 37, 39};
 158
 159 static char const bytes_to_hex_digits[] =
 160 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
 161
 162 /* It'll be a while before we see integral types wider than 16 bytes,
 163    but if/when it happens, this check will catch it.  Without this check,
 164    a wider type would provoke a buffer overrun.  */
 165 static_assert (MAX_INTEGRAL_TYPE_SIZE
 166                < ARRAY_CARDINALITY (bytes_to_hex_digits));
 167
 168 /* Make sure the other arrays have the same length.  */
 169 static_assert (sizeof bytes_to_oct_digits == sizeof bytes_to_signed_dec_digits);
 170 static_assert (sizeof bytes_to_oct_digits
 171                == sizeof bytes_to_unsigned_dec_digits);
 172 static_assert (sizeof bytes_to_oct_digits == sizeof bytes_to_hex_digits);
 173
 174 /* Convert enum size_spec to the size of the named type.  */
 175 static const int width_bytes[] =
 176 {
 177   -1,
 178   sizeof (char),
 179   sizeof (short int),
 180   sizeof (int),
 181   sizeof (long int),
 182   sizeof (unsigned_long_long_int),
 183 #if BF16_SUPPORTED
 184   sizeof (bfloat16),
 185 #else
 186   sizeof (float16),
 187 #endif
 188   sizeof (float),
 189   sizeof (double),
 190   sizeof (long double)
 191 };
 192
 193 /* Ensure that for each member of 'enum size_spec' there is an
 194    initializer in the width_bytes array.  */
 195 static_assert (ARRAY_CARDINALITY (width_bytes) == N_SIZE_SPECS);
 196
 197 /* Names for some non-printing characters.  */
 198 static char const charname[33][4] =
 199 {
 200   "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
 201   "bs", "ht", "nl", "vt", "ff", "cr", "so", "si",
 202   "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
 203   "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
 204   "sp"
 205 };
 206
 207 /* Address base (8, 10 or 16).  */
 208 static int address_base;
 209
 210 /* The number of octal digits required to represent the largest
 211    address value.  */
 212 #define MAX_ADDRESS_LENGTH \
 213   ((sizeof (uintmax_t) * CHAR_BIT + CHAR_BIT - 1) / 3)
 214
 215 /* Width of a normal address.  */
 216 static int address_pad_len;
 217
 218 /* Minimum length when detecting --strings.  */
 219 static idx_t string_min;
 220
 221 /* True when in --strings mode.  */
 222 static bool flag_dump_strings;
 223
 224 /* True if we should recognize the older non-option arguments
 225    that specified at most one file and optional arguments specifying
 226    offset and pseudo-start address.  */
 227 static bool traditional;
 228
 229 /* True if an old-style 'pseudo-address' was specified.  */
 230 static bool flag_pseudo_start;
 231
 232 /* The difference between the old-style pseudo starting address and
 233    the number of bytes to skip.  */
 234 static uintmax_t pseudo_offset;
 235
 236 /* Function that accepts an address and an optional following char,
 237    and prints the address and char to stdout.  */
 238 static void (*format_address) (uintmax_t, char);
 239
 240 /* The number of input bytes to skip before formatting and writing.  */
 241 static uintmax_t n_bytes_to_skip = 0;
 242
 243 /* When false, MAX_BYTES_TO_FORMAT and END_OFFSET are ignored, and all
 244    input is formatted.  */
 245 static bool limit_bytes_to_format = false;
 246
 247 /* The maximum number of bytes that will be formatted.  */
 248 static uintmax_t max_bytes_to_format;
 249
 250 /* The offset of the first byte after the last byte to be formatted.  */
 251 static uintmax_t end_offset;
 252
 253 /* When true and two or more consecutive blocks are equal, format
 254    only the first block and output an asterisk alone on the following
 255    line to indicate that identical blocks have been elided.  */
 256 static bool abbreviate_duplicate_blocks = true;
 257
 258 /* An array of specs describing how to format each input block.  */
 259 static struct tspec *spec;
 260
 261 /* The number of format specs.  */
 262 static idx_t n_specs;
 263
 264 /* The allocated length of SPEC.  */
 265 static idx_t n_specs_allocated;
 266
 267 /* The number of input bytes formatted per output line.  It must be
 268    a multiple of the least common multiple of the sizes associated with
 269    the specified output types.  It should be as large as possible, but
 270    no larger than 16 -- unless specified with the -w option.  */
 271 static size_t bytes_per_block;
 272
 273 /* Human-readable representation of *file_list (for error messages).
 274    It differs from file_list[-1] only when file_list[-1] is "-".  */
 275 static char const *input_filename;
 276
 277 /* A null-terminated list of the file-arguments from the command line.  */
 278 static char const *const *file_list;
 279
 280 /* Initializer for file_list if no file-arguments
 281    were specified on the command line.  */
 282 static char const *const default_file_list[] = {"-", nullptr};
 283
 284 /* The input stream associated with the current file.  */
 285 static FILE *in_stream;
 286
 287 /* If true, at least one of the files we read was standard input.  */
 288 static bool have_read_stdin;
 289
 290 /* Map the size in bytes to a type identifier.  */
 291 static enum size_spec integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1];
 292
 293 #define MAX_FP_TYPE_SIZE sizeof (long double)
 294 static enum size_spec fp_type_size[MAX_FP_TYPE_SIZE + 1];
 295
 296 #ifndef WORDS_BIGENDIAN
 297 # define WORDS_BIGENDIAN 0
 298 #endif
 299
 300 /* Use native endianness by default.  */
 301 static bool input_swap;
 302
 303 static char const short_options[] = "A:aBbcDdeFfHhIij:LlN:OoS:st:vw::Xx";
 304
 305 /* For long options that have no equivalent short option, use a
 306    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 307 enum
 308 {
 309   TRADITIONAL_OPTION = CHAR_MAX + 1,
 310   ENDIAN_OPTION,
 311 };
 312
 313 enum endian_type
 314 {
 315   endian_little,
 316   endian_big
 317 };
 318
 319 static char const *const endian_args[] =
 320 {
 321   "little", "big", nullptr
 322 };
 323
 324 static enum endian_type const endian_types[] =
 325 {
 326   endian_little, endian_big
 327 };
 328
 329 static struct option const long_options[] =
 330 {
 331   {"skip-bytes", required_argument, nullptr, 'j'},
 332   {"address-radix", required_argument, nullptr, 'A'},
 333   {"read-bytes", required_argument, nullptr, 'N'},
 334   {"format", required_argument, nullptr, 't'},
 335   {"output-duplicates", no_argument, nullptr, 'v'},
 336   {"strings", optional_argument, nullptr, 'S'},
 337   {"traditional", no_argument, nullptr, TRADITIONAL_OPTION},
 338   {"width", optional_argument, nullptr, 'w'},
 339   {"endian", required_argument, nullptr, ENDIAN_OPTION },
 340
 341   {GETOPT_HELP_OPTION_DECL},
 342   {GETOPT_VERSION_OPTION_DECL},
 343   {nullptr, 0, nullptr, 0}
 344 };
 345
 346 void
 347 usage (int status)
 348 {
 349   if (status != EXIT_SUCCESS)
 350     emit_try_help ();
 351   else
 352     {
 353       printf (_("\
 354 Usage: %s [OPTION]... [FILE]...\n\
 355   or:  %s [-abcdfilosx]... [FILE] [[+]OFFSET[.][b]]\n\
 356   or:  %s --traditional [OPTION]... [FILE] [[+]OFFSET[.][b] [+][LABEL][.][b]]\n\
 357 "),
 358               program_name, program_name, program_name);
 359       fputs (_("\n\
 360 Write an unambiguous representation, octal bytes by default,\n\
 361 of FILE to standard output.  With more than one FILE argument,\n\
 362 concatenate them in the listed order to form the input.\n\
 363 "), stdout);
 364
 365       emit_stdin_note ();
 366
 367       fputs (_("\
 368 \n\
 369 If first and second call formats both apply, the second format is assumed\n\
 370 if the last operand begins with + or (if there are 2 operands) a digit.\n\
 371 An OFFSET operand means -j OFFSET.  LABEL is the pseudo-address\n\
 372 at first byte printed, incremented when dump is progressing.\n\
 373 For OFFSET and LABEL, a 0x or 0X prefix indicates hexadecimal;\n\
 374 suffixes may be . for octal and b for multiply by 512.\n\
 375 "), stdout);
 376
 377       emit_mandatory_arg_note ();
 378
 379       fputs (_("\
 380   -A, --address-radix=RADIX   output format for file offsets; RADIX is one\n\
 381                                 of [doxn], for Decimal, Octal, Hex or None\n\
 382       --endian={big|little}   swap input bytes according the specified order\n\
 383   -j, --skip-bytes=BYTES      skip BYTES input bytes first\n\
 384 "), stdout);
 385       fputs (_("\
 386   -N, --read-bytes=BYTES      limit dump to BYTES input bytes\n\
 387   -S BYTES, --strings[=BYTES]  show only NUL terminated strings\n\
 388                                 of at least BYTES (3) printable characters\n\
 389   -t, --format=TYPE           select output format or formats\n\
 390   -v, --output-duplicates     do not use * to mark line suppression\n\
 391   -w[BYTES], --width[=BYTES]  output BYTES bytes per output line;\n\
 392                                 32 is implied when BYTES is not specified\n\
 393       --traditional           accept arguments in third form above\n\
 394 "), stdout);
 395       fputs (HELP_OPTION_DESCRIPTION, stdout);
 396       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 397       fputs (_("\
 398 \n\
 399 \n\
 400 Traditional format specifications may be intermixed; they accumulate:\n\
 401   -a   same as -t a,  select named characters, ignoring high-order bit\n\
 402   -b   same as -t o1, select octal bytes\n\
 403   -c   same as -t c,  select printable characters or backslash escapes\n\
 404   -d   same as -t u2, select unsigned decimal 2-byte units\n\
 405 "), stdout);
 406       fputs (_("\
 407   -f   same as -t fF, select floats\n\
 408   -i   same as -t dI, select decimal ints\n\
 409   -l   same as -t dL, select decimal longs\n\
 410   -o   same as -t o2, select octal 2-byte units\n\
 411   -s   same as -t d2, select decimal 2-byte units\n\
 412   -x   same as -t x2, select hexadecimal 2-byte units\n\
 413 "), stdout);
 414       fputs (_("\
 415 \n\
 416 \n\
 417 TYPE is made up of one or more of these specifications:\n\
 418   a          named character, ignoring high-order bit\n\
 419   c          printable character or backslash escape\n\
 420 "), stdout);
 421       fputs (_("\
 422   d[SIZE]    signed decimal, SIZE bytes per integer\n\
 423   f[SIZE]    floating point, SIZE bytes per float\n\
 424   o[SIZE]    octal, SIZE bytes per integer\n\
 425   u[SIZE]    unsigned decimal, SIZE bytes per integer\n\
 426   x[SIZE]    hexadecimal, SIZE bytes per integer\n\
 427 "), stdout);
 428       fputs (_("\
 429 \n\
 430 SIZE is a number.  For TYPE in [doux], SIZE may also be C for\n\
 431 sizeof(char), S for sizeof(short), I for sizeof(int) or L for\n\
 432 sizeof(long).  If TYPE is f, SIZE may also be B for Brain 16 bit,\n\
 433 H for Half precision float, F for sizeof(float), D for sizeof(double),\n\
 434 or L for sizeof(long double).\n\
 435 "), stdout);
 436       fputs (_("\
 437 \n\
 438 Adding a z suffix to any type displays printable characters at the end of\n\
 439 each output line.\n\
 440 "), stdout);
 441       fputs (_("\
 442 \n\
 443 \n\
 444 BYTES is hex with 0x or 0X prefix, and may have a multiplier suffix:\n\
 445   b    512\n\
 446   KB   1000\n\
 447   K    1024\n\
 448   MB   1000*1000\n\
 449   M    1024*1024\n\
 450 and so on for G, T, P, E, Z, Y, R, Q.\n\
 451 Binary prefixes can be used, too: KiB=K, MiB=M, and so on.\n\
 452 "), stdout);
 453       emit_ancillary_info (PROGRAM_NAME);
 454     }
 455   exit (status);
 456 }
 457
 458 /* Define the print functions.  */
 459
 460 #define PRINT_FIELDS(N, T, FMT_STRING_DECL, ACTION)                     \
 461 static void                                                             \
 462 N (size_t fields, size_t blank, void const *block,                      \
 463    FMT_STRING_DECL, int width, int pad)                                 \
 464 {                                                                       \
 465   T const *p = block;                                                   \
 466   uintmax_t i;                                                          \
 467   int pad_remaining = pad;                                              \
 468   for (i = fields; blank < i; i--)                                      \
 469     {                                                                   \
 470       int next_pad = pad * (i - 1) / fields;                            \
 471       int adjusted_width = pad_remaining - next_pad + width;            \
 472       T x;                                                              \
 473       if (input_swap && sizeof (T) > 1)                                 \
 474         {                                                               \
 475           size_t j;                                                     \
 476           union {                                                       \
 477             T x;                                                        \
 478             char b[sizeof (T)];                                         \
 479           } u;                                                          \
 480           for (j = 0; j < sizeof (T); j++)                              \
 481             u.b[j] = ((char const *) p)[sizeof (T) - 1 - j];            \
 482           x = u.x;                                                      \
 483         }                                                               \
 484       else                                                              \
 485         x = *p;                                                         \
 486       p++;                                                              \
 487       ACTION;                                                           \
 488       pad_remaining = next_pad;                                         \
 489     }                                                                   \
 490 }
 491
 492 #define PRINT_TYPE(N, T)                                                \
 493   PRINT_FIELDS (N, T, char const *fmt_string,                           \
 494                 xprintf (fmt_string, adjusted_width, x))
 495
 496 #define PRINT_FLOATTYPE(N, T, FTOASTR, BUFSIZE)                         \
 497   PRINT_FIELDS (N, T, MAYBE_UNUSED char const *fmt_string,              \
 498                 char buf[BUFSIZE];                                      \
 499                 FTOASTR (buf, sizeof buf, 0, 0, x);                     \
 500                 xprintf ("%*s", adjusted_width, buf))
 501
 502 PRINT_TYPE (print_s_char, signed char)
 503 PRINT_TYPE (print_char, unsigned char)
 504 PRINT_TYPE (print_s_short, short int)
 505 PRINT_TYPE (print_short, unsigned short int)
 506 PRINT_TYPE (print_int, unsigned int)
 507 PRINT_TYPE (print_long, unsigned long int)
 508 PRINT_TYPE (print_long_long, unsigned_long_long_int)
 509
 510 PRINT_FLOATTYPE (print_bfloat, bfloat16, ftoastr, FLT_BUFSIZE_BOUND)
 511 PRINT_FLOATTYPE (print_halffloat, float16, ftoastr, FLT_BUFSIZE_BOUND)
 512 PRINT_FLOATTYPE (print_float, float, ftoastr, FLT_BUFSIZE_BOUND)
 513 PRINT_FLOATTYPE (print_double, double, dtoastr, DBL_BUFSIZE_BOUND)
 514 PRINT_FLOATTYPE (print_long_double, long double, ldtoastr, LDBL_BUFSIZE_BOUND)
 515
 516 #undef PRINT_TYPE
 517 #undef PRINT_FLOATTYPE
 518
 519 static void
 520 dump_hexl_mode_trailer (size_t n_bytes, char const *block)
 521 {
 522   fputs ("  >", stdout);
 523   for (size_t i = n_bytes; i > 0; i--)
 524     {
 525       unsigned char c = *block++;
 526       unsigned char c2 = (isprint (c) ? c : '.');
 527       putchar (c2);
 528     }
 529   putchar ('<');
 530 }
 531
 532 static void
 533 print_named_ascii (size_t fields, size_t blank, void const *block,
 534                    MAYBE_UNUSED char const *unused_fmt_string,
 535                    int width, int pad)
 536 {
 537   unsigned char const *p = block;
 538   uintmax_t i;
 539   int pad_remaining = pad;
 540   for (i = fields; blank < i; i--)
 541     {
 542       int next_pad = pad * (i - 1) / fields;
 543       int masked_c = *p++ & 0x7f;
 544       char const *s;
 545       char buf[2];
 546
 547       if (masked_c == 127)
 548         s = "del";
 549       else if (masked_c <= 040)
 550         s = charname[masked_c];
 551       else
 552         {
 553           buf[0] = masked_c;
 554           buf[1] = 0;
 555           s = buf;
 556         }
 557
 558       xprintf ("%*s", pad_remaining - next_pad + width, s);
 559       pad_remaining = next_pad;
 560     }
 561 }
 562
 563 static void
 564 print_ascii (size_t fields, size_t blank, void const *block,
 565              MAYBE_UNUSED char const *unused_fmt_string, int width,
 566              int pad)
 567 {
 568   unsigned char const *p = block;
 569   uintmax_t i;
 570   int pad_remaining = pad;
 571   for (i = fields; blank < i; i--)
 572     {
 573       int next_pad = pad * (i - 1) / fields;
 574       unsigned char c = *p++;
 575       char const *s;
 576       char buf[4];
 577
 578       switch (c)
 579         {
 580         case '\0':
 581           s = "\\0";
 582           break;
 583
 584         case '\a':
 585           s = "\\a";
 586           break;
 587
 588         case '\b':
 589           s = "\\b";
 590           break;
 591
 592         case '\f':
 593           s = "\\f";
 594           break;
 595
 596         case '\n':
 597           s = "\\n";
 598           break;
 599
 600         case '\r':
 601           s = "\\r";
 602           break;
 603
 604         case '\t':
 605           s = "\\t";
 606           break;
 607
 608         case '\v':
 609           s = "\\v";
 610           break;
 611
 612         default:
 613           sprintf (buf, (isprint (c) ? "%c" : "%03o"), c);
 614           s = buf;
 615         }
 616
 617       xprintf ("%*s", pad_remaining - next_pad + width, s);
 618       pad_remaining = next_pad;
 619     }
 620 }
 621
 622 /* Convert a null-terminated (possibly zero-length) string S to an
 623    int value.  If S points to a non-digit set *P to S,
 624    *VAL to 0, and return true.  Otherwise, accumulate the integer value of
 625    the string of digits.  If the string of digits represents a value
 626    larger than INT_MAX, don't modify *VAL or *P and return false.
 627    Otherwise, advance *P to the first non-digit after S, set *VAL to
 628    the result of the conversion and return true.  */
 629
 630 static bool
 631 simple_strtoi (char const *s, char const **p, int *val)
 632 {
 633   int sum;
 634
 635   for (sum = 0; ISDIGIT (*s); s++)
 636     if (ckd_mul (&sum, sum, 10) || ckd_add (&sum, sum, *s - '0'))
 637       return false;
 638   *p = s;
 639   *val = sum;
 640   return true;
 641 }
 642
 643 /* If S points to a single valid modern od format string, put
 644    a description of that format in *TSPEC, make *NEXT point at the
 645    character following the just-decoded format (if *NEXT is non-null),
 646    and return true.  If S is not valid, don't modify *NEXT or *TSPEC,
 647    give a diagnostic, and return false.  For example, if S were
 648    "d4afL" *NEXT would be set to "afL" and *TSPEC would be
 649      {
 650        fmt = SIGNED_DECIMAL;
 651        size = INT or LONG; (whichever integral_type_size[4] resolves to)
 652        print_function = print_int; (assuming size == INT)
 653        field_width = 11;
 654        fmt_string = "%*d";
 655       }
 656    pad_width is determined later, but is at least as large as the
 657    number of fields printed per row.
 658    S_ORIG is solely for reporting errors.  It should be the full format
 659    string argument.
 660    */
 661
 662 static bool ATTRIBUTE_NONNULL ()
 663 decode_one_format (char const *s_orig, char const *s, char const **next,
 664                    struct tspec *tspec)
 665 {
 666   enum size_spec size_spec;
 667   int size;
 668   enum output_format fmt;
 669   void (*print_function) (size_t, size_t, void const *, char const *,
 670                           int, int);
 671   char const *p;
 672   char c;
 673   int field_width;
 674
 675   switch (*s)
 676     {
 677     case 'd':
 678     case 'o':
 679     case 'u':
 680     case 'x':
 681       c = *s;
 682       ++s;
 683       switch (*s)
 684         {
 685         case 'C':
 686           ++s;
 687           size = sizeof (char);
 688           break;
 689
 690         case 'S':
 691           ++s;
 692           size = sizeof (short int);
 693           break;
 694
 695         case 'I':
 696           ++s;
 697           size = sizeof (int);
 698           break;
 699
 700         case 'L':
 701           ++s;
 702           size = sizeof (long int);
 703           break;
 704
 705         default:
 706           if (! simple_strtoi (s, &p, &size))
 707             {
 708               /* The integer at P in S would overflow an int.
 709                  A digit string that long is sufficiently odd looking
 710                  that the following diagnostic is sufficient.  */
 711               error (0, 0, _("invalid type string %s"), quote (s_orig));
 712               return false;
 713             }
 714           if (p == s)
 715             size = sizeof (int);
 716           else
 717             {
 718               if (MAX_INTEGRAL_TYPE_SIZE < size
 719                   || integral_type_size[size] == NO_SIZE)
 720                 {
 721                   error (0, 0, _("invalid type string %s;\nthis system"
 722                                  " doesn't provide a %d-byte integral type"),
 723                          quote (s_orig), size);
 724                   return false;
 725                 }
 726               s = p;
 727             }
 728           break;
 729         }
 730
 731 #define ISPEC_TO_FORMAT(Spec, Min_format, Long_format, Max_format)      \
 732   ((Spec) == LONG_LONG ? (Max_format)                                   \
 733    : ((Spec) == LONG ? (Long_format)                                    \
 734       : (Min_format)))                                                  \
 735
 736       size_spec = integral_type_size[size];
 737
 738       switch (c)
 739         {
 740         case 'd':
 741           fmt = SIGNED_DECIMAL;
 742           field_width = bytes_to_signed_dec_digits[size];
 743           sprintf (tspec->fmt_string, "%%*%s",
 744                    ISPEC_TO_FORMAT (size_spec, "d", "ld", "jd"));
 745           break;
 746
 747         case 'o':
 748           fmt = OCTAL;
 749           sprintf (tspec->fmt_string, "%%*.%d%s",
 750                    (field_width = bytes_to_oct_digits[size]),
 751                    ISPEC_TO_FORMAT (size_spec, "o", "lo", "jo"));
 752           break;
 753
 754         case 'u':
 755           fmt = UNSIGNED_DECIMAL;
 756           field_width = bytes_to_unsigned_dec_digits[size];
 757           sprintf (tspec->fmt_string, "%%*%s",
 758                    ISPEC_TO_FORMAT (size_spec, "u", "lu", "ju"));
 759           break;
 760
 761         case 'x':
 762           fmt = HEXADECIMAL;
 763           sprintf (tspec->fmt_string, "%%*.%d%s",
 764                    (field_width = bytes_to_hex_digits[size]),
 765                    ISPEC_TO_FORMAT (size_spec, "x", "lx", "jx"));
 766           break;
 767
 768         default:
 769           unreachable ();
 770         }
 771
 772       switch (size_spec)
 773         {
 774         case CHAR:
 775           print_function = (fmt == SIGNED_DECIMAL
 776                             ? print_s_char
 777                             : print_char);
 778           break;
 779
 780         case SHORT:
 781           print_function = (fmt == SIGNED_DECIMAL
 782                             ? print_s_short
 783                             : print_short);
 784           break;
 785
 786         case INT:
 787           print_function = print_int;
 788           break;
 789
 790         case LONG:
 791           print_function = print_long;
 792           break;
 793
 794         case LONG_LONG:
 795           print_function = print_long_long;
 796           break;
 797
 798         default:
 799           affirm (false);
 800         }
 801       break;
 802
 803     case 'f':
 804       fmt = FLOATING_POINT;
 805       ++s;
 806       switch (*s)
 807         {
 808         case 'B':
 809           ++s;
 810           fmt = BFLOATING_POINT;
 811           size = sizeof (bfloat16);
 812           break;
 813
 814         case 'H':
 815           ++s;
 816           fmt = HFLOATING_POINT;
 817           size = sizeof (float16);
 818           break;
 819
 820         case 'F':
 821           ++s;
 822           size = sizeof (float);
 823           break;
 824
 825         case 'D':
 826           ++s;
 827           size = sizeof (double);
 828           break;
 829
 830         case 'L':
 831           ++s;
 832           size = sizeof (long double);
 833           break;
 834
 835         default:
 836           if (! simple_strtoi (s, &p, &size))
 837             {
 838               /* The integer at P in S would overflow an int.
 839                  A digit string that long is sufficiently odd looking
 840                  that the following diagnostic is sufficient.  */
 841               error (0, 0, _("invalid type string %s"), quote (s_orig));
 842               return false;
 843             }
 844           if (p == s)
 845             size = sizeof (double);
 846           else
 847             {
 848               if (size > MAX_FP_TYPE_SIZE
 849                   || fp_type_size[size] == NO_SIZE
 850                   || (! FLOAT16_SUPPORTED && BF16_SUPPORTED
 851                       && size == sizeof (bfloat16))
 852                   )
 853                 {
 854                   error (0, 0,
 855                          _("invalid type string %s;\n"
 856                            "this system doesn't provide a %d-byte"
 857                            " floating point type"),
 858                          quote (s_orig), size);
 859                   return false;
 860                 }
 861               s = p;
 862             }
 863           break;
 864         }
 865       size_spec = fp_type_size[size];
 866
 867       if ((! FLOAT16_SUPPORTED && fmt == HFLOATING_POINT)
 868           || (! BF16_SUPPORTED && fmt == BFLOATING_POINT))
 869       {
 870         error (0, 0,
 871                _("this system doesn't provide a %s floating point type"),
 872                quote (s_orig));
 873         return false;
 874       }
 875
 876       {
 877         struct lconv const *locale = localeconv ();
 878         size_t decimal_point_len =
 879           (locale->decimal_point[0] ? strlen (locale->decimal_point) : 1);
 880
 881         switch (size_spec)
 882           {
 883           case FLOAT_HALF:
 884             print_function = fmt == BFLOATING_POINT
 885                              ? print_bfloat : print_halffloat;
 886             field_width = FLT_STRLEN_BOUND_L (decimal_point_len);
 887             break;
 888
 889           case FLOAT_SINGLE:
 890             print_function = print_float;
 891             field_width = FLT_STRLEN_BOUND_L (decimal_point_len);
 892             break;
 893
 894           case FLOAT_DOUBLE:
 895             print_function = print_double;
 896             field_width = DBL_STRLEN_BOUND_L (decimal_point_len);
 897             break;
 898
 899           case FLOAT_LONG_DOUBLE:
 900             print_function = print_long_double;
 901             field_width = LDBL_STRLEN_BOUND_L (decimal_point_len);
 902             break;
 903
 904           default:
 905             affirm (false);
 906           }
 907
 908         break;
 909       }
 910
 911     case 'a':
 912       ++s;
 913       fmt = NAMED_CHARACTER;
 914       size_spec = CHAR;
 915       print_function = print_named_ascii;
 916       field_width = 3;
 917       break;
 918
 919     case 'c':
 920       ++s;
 921       fmt = CHARACTER;
 922       size_spec = CHAR;
 923       print_function = print_ascii;
 924       field_width = 3;
 925       break;
 926
 927     default:
 928       error (0, 0, _("invalid character '%c' in type string %s"),
 929              *s, quote (s_orig));
 930       return false;
 931     }
 932
 933   tspec->size = size_spec;
 934   tspec->fmt = fmt;
 935   tspec->print_function = print_function;
 936
 937   tspec->field_width = field_width;
 938   tspec->hexl_mode_trailer = (*s == 'z');
 939   if (tspec->hexl_mode_trailer)
 940     s++;
 941
 942   *next = s;
 943   return true;
 944 }
 945
 946 /* Given a list of one or more input filenames FILE_LIST, set the global
 947    file pointer IN_STREAM and the global string INPUT_FILENAME to the
 948    first one that can be successfully opened. Modify FILE_LIST to
 949    reference the next filename in the list.  A file name of "-" is
 950    interpreted as standard input.  If any file open fails, give an error
 951    message and return false.  */
 952
 953 static bool
 954 open_next_file (void)
 955 {
 956   bool ok = true;
 957
 958   do
 959     {
 960       input_filename = *file_list;
 961       if (input_filename == nullptr)
 962         return ok;
 963       ++file_list;
 964
 965       if (STREQ (input_filename, "-"))
 966         {
 967           input_filename = _("standard input");
 968           in_stream = stdin;
 969           have_read_stdin = true;
 970           xset_binary_mode (STDIN_FILENO, O_BINARY);
 971         }
 972       else
 973         {
 974           in_stream = fopen (input_filename, (O_BINARY ? "rb" : "r"));
 975           if (in_stream == nullptr)
 976             {
 977               error (0, errno, "%s", quotef (input_filename));
 978               ok = false;
 979             }
 980         }
 981     }
 982   while (in_stream == nullptr);
 983
 984   if (limit_bytes_to_format && !flag_dump_strings)
 985     setvbuf (in_stream, nullptr, _IONBF, 0);
 986
 987   return ok;
 988 }
 989
 990 /* Test whether there have been errors on in_stream, and close it if
 991    it is not standard input.  Return false if there has been an error
 992    on in_stream or stdout; return true otherwise.  This function will
 993    report more than one error only if both a read and a write error
 994    have occurred.  IN_ERRNO, if nonzero, is the error number
 995    corresponding to the most recent action for IN_STREAM.  */
 996
 997 static bool
 998 check_and_close (int in_errno)
 999 {
1000   bool ok = true;
1001
1002   if (in_stream != nullptr)
1003     {
1004       if (!ferror (in_stream))
1005         in_errno = 0;
1006       if (STREQ (file_list[-1], "-"))
1007         clearerr (in_stream);
1008       else if (fclose (in_stream) != 0 && !in_errno)
1009         in_errno = errno;
1010       if (in_errno)
1011         {
1012           error (0, in_errno, "%s", quotef (input_filename));
1013           ok = false;
1014         }
1015
1016       in_stream = nullptr;
1017     }
1018
1019   if (ferror (stdout))
1020     {
1021       error (0, 0, _("write error"));
1022       ok = false;
1023     }
1024
1025   return ok;
1026 }
1027
1028 /* Decode the modern od format string S.  Append the decoded
1029    representation to the global array SPEC, reallocating SPEC if
1030    necessary.  Return true if S is valid.  */
1031
1032 static bool ATTRIBUTE_NONNULL ()
1033 decode_format_string (char const *s)
1034 {
1035   char const *s_orig = s;
1036
1037   while (*s != '\0')
1038     {
1039       char const *next;
1040
1041       if (n_specs_allocated <= n_specs)
1042         spec = xpalloc (spec, &n_specs_allocated, 1, -1, sizeof *spec);
1043
1044       if (! decode_one_format (s_orig, s, &next, &spec[n_specs]))
1045         return false;
1046
1047       affirm (s != next);
1048       s = next;
1049       ++n_specs;
1050     }
1051
1052   return true;
1053 }
1054
1055 /* Given a list of one or more input filenames FILE_LIST, set the global
1056    file pointer IN_STREAM to position N_SKIP in the concatenation of
1057    those files.  If any file operation fails or if there are fewer than
1058    N_SKIP bytes in the combined input, give an error message and return
1059    false.  When possible, use seek rather than read operations to
1060    advance IN_STREAM.  */
1061
1062 static bool
1063 skip (uintmax_t n_skip)
1064 {
1065   bool ok = true;
1066   int in_errno = 0;
1067
1068   if (n_skip == 0)
1069     return true;
1070
1071   while (in_stream != nullptr)  /* EOF.  */
1072     {
1073       struct stat file_stats;
1074
1075       /* First try seeking.  For large offsets, this extra work is
1076          worthwhile.  If the offset is below some threshold it may be
1077          more efficient to move the pointer by reading.  There are two
1078          issues when trying to seek:
1079            - the file must be seekable.
1080            - before seeking to the specified position, make sure
1081              that the new position is in the current file.
1082              Try to do that by getting file's size using fstat.
1083              But that will work only for regular files.  */
1084
1085       if (fstat (fileno (in_stream), &file_stats) == 0)
1086         {
1087           bool usable_size = usable_st_size (&file_stats);
1088
1089           /* The st_size field is valid for regular files.
1090              If the number of bytes left to skip is larger than
1091              the size of the current file, we can decrement n_skip
1092              and go on to the next file.  Skip this optimization also
1093              when st_size is no greater than the block size, because
1094              some kernels report nonsense small file sizes for
1095              proc-like file systems.  */
1096           if (usable_size && STP_BLKSIZE (&file_stats) < file_stats.st_size)
1097             {
1098               if ((uintmax_t) file_stats.st_size < n_skip)
1099                 n_skip -= file_stats.st_size;
1100               else
1101                 {
1102                   if (fseeko (in_stream, n_skip, SEEK_CUR) != 0)
1103                     {
1104                       in_errno = errno;
1105                       ok = false;
1106                     }
1107                   n_skip = 0;
1108                 }
1109             }
1110
1111           else if (!usable_size && fseeko (in_stream, n_skip, SEEK_CUR) == 0)
1112             n_skip = 0;
1113
1114           /* If it's not a regular file with nonnegative size,
1115              or if it's so small that it might be in a proc-like file system,
1116              position the file pointer by reading.  */
1117
1118           else
1119             {
1120               char buf[BUFSIZ];
1121               size_t n_bytes_read, n_bytes_to_read = BUFSIZ;
1122
1123               while (0 < n_skip)
1124                 {
1125                   if (n_skip < n_bytes_to_read)
1126                     n_bytes_to_read = n_skip;
1127                   n_bytes_read = fread (buf, 1, n_bytes_to_read, in_stream);
1128                   n_skip -= n_bytes_read;
1129                   if (n_bytes_read != n_bytes_to_read)
1130                     {
1131                       if (ferror (in_stream))
1132                         {
1133                           in_errno = errno;
1134                           ok = false;
1135                           n_skip = 0;
1136                           break;
1137                         }
1138                       if (feof (in_stream))
1139                         break;
1140                     }
1141                 }
1142             }
1143
1144           if (n_skip == 0)
1145             break;
1146         }
1147
1148       else   /* cannot fstat() file */
1149         {
1150           error (0, errno, "%s", quotef (input_filename));
1151           ok = false;
1152         }
1153
1154       ok &= check_and_close (in_errno);
1155
1156       ok &= open_next_file ();
1157     }
1158
1159   if (n_skip != 0)
1160     error (EXIT_FAILURE, 0, _("cannot skip past end of combined input"));
1161
1162   return ok;
1163 }
1164
1165 static void
1166 format_address_none (MAYBE_UNUSED uintmax_t address,
1167                      MAYBE_UNUSED char c)
1168 {
1169 }
1170
1171 static void
1172 format_address_std (uintmax_t address, char c)
1173 {
1174   char buf[MAX_ADDRESS_LENGTH + 2];
1175   char *p = buf + sizeof buf;
1176   char const *pbound;
1177
1178   *--p = '\0';
1179   *--p = c;
1180   pbound = p - address_pad_len;
1181
1182   /* Use a special case of the code for each base.  This is measurably
1183      faster than generic code.  */
1184   switch (address_base)
1185     {
1186     case 8:
1187       do
1188         *--p = '0' + (address & 7);
1189       while ((address >>= 3) != 0);
1190       break;
1191
1192     case 10:
1193       do
1194         *--p = '0' + (address % 10);
1195       while ((address /= 10) != 0);
1196       break;
1197
1198     case 16:
1199       do
1200         *--p = "0123456789abcdef"[address & 15];
1201       while ((address >>= 4) != 0);
1202       break;
1203     }
1204
1205   while (pbound < p)
1206     *--p = '0';
1207
1208   fputs (p, stdout);
1209 }
1210
1211 static void
1212 format_address_paren (uintmax_t address, char c)
1213 {
1214   putchar ('(');
1215   format_address_std (address, ')');
1216   if (c)
1217     putchar (c);
1218 }
1219
1220 static void
1221 format_address_label (uintmax_t address, char c)
1222 {
1223   format_address_std (address, ' ');
1224   format_address_paren (address + pseudo_offset, c);
1225 }
1226
1227 /* Write N_BYTES bytes from CURR_BLOCK to standard output once for each
1228    of the N_SPEC format specs.  CURRENT_OFFSET is the byte address of
1229    CURR_BLOCK in the concatenation of input files, and it is printed
1230    (optionally) only before the output line associated with the first
1231    format spec.  When duplicate blocks are being abbreviated, the output
1232    for a sequence of identical input blocks is the output for the first
1233    block followed by an asterisk alone on a line.  It is valid to compare
1234    the blocks PREV_BLOCK and CURR_BLOCK only when N_BYTES == BYTES_PER_BLOCK.
1235    That condition may be false only for the last input block.  */
1236
1237 static void
1238 write_block (uintmax_t current_offset, size_t n_bytes,
1239              char const *prev_block, char const *curr_block)
1240 {
1241   static bool first = true;
1242   static bool prev_pair_equal = false;
1243
1244 #define EQUAL_BLOCKS(b1, b2) (memcmp (b1, b2, bytes_per_block) == 0)
1245
1246   if (abbreviate_duplicate_blocks
1247       && !first && n_bytes == bytes_per_block
1248       && EQUAL_BLOCKS (prev_block, curr_block))
1249     {
1250       if (prev_pair_equal)
1251         {
1252           /* The two preceding blocks were equal, and the current
1253              block is the same as the last one, so print nothing.  */
1254         }
1255       else
1256         {
1257           printf ("*\n");
1258           prev_pair_equal = true;
1259         }
1260     }
1261   else
1262     {
1263       prev_pair_equal = false;
1264       for (idx_t i = 0; i < n_specs; i++)
1265         {
1266           int datum_width = width_bytes[spec[i].size];
1267           int fields_per_block = bytes_per_block / datum_width;
1268           int blank_fields = (bytes_per_block - n_bytes) / datum_width;
1269           if (i == 0)
1270             format_address (current_offset, '\0');
1271           else
1272             printf ("%*s", address_pad_len, "");
1273           (*spec[i].print_function) (fields_per_block, blank_fields,
1274                                      curr_block, spec[i].fmt_string,
1275                                      spec[i].field_width, spec[i].pad_width);
1276           if (spec[i].hexl_mode_trailer)
1277             {
1278               /* space-pad out to full line width, then dump the trailer */
1279               int field_width = spec[i].field_width;
1280               int pad_width = (spec[i].pad_width * blank_fields
1281                                / fields_per_block);
1282               printf ("%*s", blank_fields * field_width + pad_width, "");
1283               dump_hexl_mode_trailer (n_bytes, curr_block);
1284             }
1285           putchar ('\n');
1286         }
1287     }
1288   first = false;
1289 }
1290
1291 /* Read a single byte into *C from the concatenation of the input files
1292    named in the global array FILE_LIST.  On the first call to this
1293    function, the global variable IN_STREAM is expected to be an open
1294    stream associated with the input file INPUT_FILENAME.  If IN_STREAM
1295    is at end-of-file, close it and update the global variables IN_STREAM
1296    and INPUT_FILENAME so they correspond to the next file in the list.
1297    Then try to read a byte from the newly opened file.  Repeat if
1298    necessary until EOF is reached for the last file in FILE_LIST, then
1299    set *C to EOF and return.  Subsequent calls do likewise.  Return
1300    true if successful.  */
1301
1302 static bool
1303 read_char (int *c)
1304 {
1305   bool ok = true;
1306
1307   *c = EOF;
1308
1309   while (in_stream != nullptr)  /* EOF.  */
1310     {
1311       *c = fgetc (in_stream);
1312
1313       if (*c != EOF)
1314         break;
1315
1316       ok &= check_and_close (errno);
1317
1318       ok &= open_next_file ();
1319     }
1320
1321   return ok;
1322 }
1323
1324 /* Read N bytes into BLOCK from the concatenation of the input files
1325    named in the global array FILE_LIST.  On the first call to this
1326    function, the global variable IN_STREAM is expected to be an open
1327    stream associated with the input file INPUT_FILENAME.  If all N
1328    bytes cannot be read from IN_STREAM, close IN_STREAM and update
1329    the global variables IN_STREAM and INPUT_FILENAME.  Then try to
1330    read the remaining bytes from the newly opened file.  Repeat if
1331    necessary until EOF is reached for the last file in FILE_LIST.
1332    On subsequent calls, don't modify BLOCK and return true.  Set
1333    *N_BYTES_IN_BUFFER to the number of bytes read.  If an error occurs,
1334    it will be detected through ferror when the stream is about to be
1335    closed.  If there is an error, give a message but continue reading
1336    as usual and return false.  Otherwise return true.  */
1337
1338 static bool
1339 read_block (size_t n, char *block, size_t *n_bytes_in_buffer)
1340 {
1341   bool ok = true;
1342
1343   affirm (0 < n && n <= bytes_per_block);
1344
1345   *n_bytes_in_buffer = 0;
1346
1347   while (in_stream != nullptr)  /* EOF.  */
1348     {
1349       size_t n_needed;
1350       size_t n_read;
1351
1352       n_needed = n - *n_bytes_in_buffer;
1353       n_read = fread (block + *n_bytes_in_buffer, 1, n_needed, in_stream);
1354
1355       *n_bytes_in_buffer += n_read;
1356
1357       if (n_read == n_needed)
1358         break;
1359
1360       ok &= check_and_close (errno);
1361
1362       ok &= open_next_file ();
1363     }
1364
1365   return ok;
1366 }
1367
1368 /* Return the least common multiple of the sizes associated
1369    with the format specs.  */
1370
1371 ATTRIBUTE_PURE
1372 static int
1373 get_lcm (void)
1374 {
1375   int l_c_m = 1;
1376
1377   for (idx_t i = 0; i < n_specs; i++)
1378     l_c_m = lcm (l_c_m, width_bytes[spec[i].size]);
1379   return l_c_m;
1380 }
1381
1382 /* If S is a valid traditional offset specification with an optional
1383    leading '+' return true and set *OFFSET to the offset it denotes.  */
1384
1385 static bool
1386 parse_old_offset (char const *s, uintmax_t *offset)
1387 {
1388   int radix;
1389
1390   if (*s == '\0')
1391     return false;
1392
1393   /* Skip over any leading '+'. */
1394   if (s[0] == '+')
1395     ++s;
1396
1397   /* Determine the radix we'll use to interpret S.  If there is a '.',
1398      it's decimal, otherwise, if the string begins with '0X'or '0x',
1399      it's hexadecimal, else octal.  */
1400   if (strchr (s, '.') != nullptr)
1401     radix = 10;
1402   else
1403     {
1404       if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
1405         radix = 16;
1406       else
1407         radix = 8;
1408     }
1409
1410   return xstrtoumax (s, nullptr, radix, offset, "Bb") == LONGINT_OK;
1411 }
1412
1413 /* Read a chunk of size BYTES_PER_BLOCK from the input files, write the
1414    formatted block to standard output, and repeat until the specified
1415    maximum number of bytes has been read or until all input has been
1416    processed.  If the last block read is smaller than BYTES_PER_BLOCK
1417    and its size is not a multiple of the size associated with a format
1418    spec, extend the input block with zero bytes until its length is a
1419    multiple of all format spec sizes.  Write the final block.  Finally,
1420    write on a line by itself the offset of the byte after the last byte
1421    read.  Accumulate return values from calls to read_block and
1422    check_and_close, and if any was false, return false.
1423    Otherwise, return true.  */
1424
1425 static bool
1426 dump (void)
1427 {
1428   char *block[2];
1429   uintmax_t current_offset;
1430   bool idx = false;
1431   bool ok = true;
1432   size_t n_bytes_read;
1433
1434   block[0] = xnmalloc (2, bytes_per_block);
1435   block[1] = block[0] + bytes_per_block;
1436
1437   current_offset = n_bytes_to_skip;
1438
1439   if (limit_bytes_to_format)
1440     {
1441       while (ok)
1442         {
1443           size_t n_needed;
1444           if (current_offset >= end_offset)
1445             {
1446               n_bytes_read = 0;
1447               break;
1448             }
1449           n_needed = MIN (end_offset - current_offset,
1450                           (uintmax_t) bytes_per_block);
1451           ok &= read_block (n_needed, block[idx], &n_bytes_read);
1452           if (n_bytes_read < bytes_per_block)
1453             break;
1454           affirm (n_bytes_read == bytes_per_block);
1455           write_block (current_offset, n_bytes_read,
1456                        block[!idx], block[idx]);
1457           if (ferror (stdout))
1458             ok = false;
1459           current_offset += n_bytes_read;
1460           idx = !idx;
1461         }
1462     }
1463   else
1464     {
1465       while (ok)
1466         {
1467           ok &= read_block (bytes_per_block, block[idx], &n_bytes_read);
1468           if (n_bytes_read < bytes_per_block)
1469             break;
1470           affirm (n_bytes_read == bytes_per_block);
1471           write_block (current_offset, n_bytes_read,
1472                        block[!idx], block[idx]);
1473           if (ferror (stdout))
1474             ok = false;
1475           current_offset += n_bytes_read;
1476           idx = !idx;
1477         }
1478     }
1479
1480   if (n_bytes_read > 0)
1481     {
1482       int l_c_m;
1483       size_t bytes_to_write;
1484
1485       l_c_m = get_lcm ();
1486
1487       /* Ensure zero-byte padding up to the smallest multiple of l_c_m that
1488          is at least as large as n_bytes_read.  */
1489       bytes_to_write = l_c_m * ((n_bytes_read + l_c_m - 1) / l_c_m);
1490
1491       memset (block[idx] + n_bytes_read, 0, bytes_to_write - n_bytes_read);
1492       write_block (current_offset, n_bytes_read, block[!idx], block[idx]);
1493       current_offset += n_bytes_read;
1494     }
1495
1496   format_address (current_offset, '\n');
1497
1498   if (limit_bytes_to_format && current_offset >= end_offset)
1499     ok &= check_and_close (0);
1500
1501   free (block[0]);
1502
1503   return ok;
1504 }
1505
1506 /* STRINGS mode.  Find each "string constant" in the input.
1507    A string constant is a run of at least 'string_min' ASCII
1508    graphic (or formatting) characters terminated by a null.
1509    Based on a function written by Richard Stallman for a
1510    traditional version of od.  Return true if successful.  */
1511
1512 static bool
1513 dump_strings (void)
1514 {
1515   idx_t bufsize = MAX (100, string_min);
1516   char *buf = xmalloc (bufsize);
1517   uintmax_t address = n_bytes_to_skip;
1518   bool ok = true;
1519
1520   while (true)
1521     {
1522       idx_t i;
1523       int c;
1524
1525       /* See if the next 'string_min' chars are all printing chars.  */
1526     tryline:
1527
1528       if (limit_bytes_to_format
1529           && (end_offset < string_min || end_offset - string_min <= address))
1530         break;
1531
1532       for (i = 0; i < string_min; i++)
1533         {
1534           ok &= read_char (&c);
1535           address++;
1536           if (c < 0)
1537             {
1538               free (buf);
1539               return ok;
1540             }
1541           if (! isprint (c))
1542             /* Found a non-printing.  Try again starting with next char.  */
1543             goto tryline;
1544           buf[i] = c;
1545         }
1546
1547       /* We found a run of 'string_min' printable characters.
1548          Now see if it is terminated with a null byte.  */
1549       while (!limit_bytes_to_format || address < end_offset)
1550         {
1551           if (i == bufsize)
1552             buf = xpalloc (buf, &bufsize, 1, -1, sizeof *buf);
1553           ok &= read_char (&c);
1554           address++;
1555           if (c < 0)
1556             {
1557               free (buf);
1558               return ok;
1559             }
1560           if (c == '\0')
1561             break;              /* It is; print this string.  */
1562           if (! isprint (c))
1563             goto tryline;       /* It isn't; give up on this string.  */
1564           buf[i++] = c;         /* String continues; store it all.  */
1565         }
1566
1567       /* If we get here, the string is all printable and null-terminated,
1568          so print it.  It is all in 'buf' and 'i' is its length.  */
1569       buf[i] = 0;
1570       format_address (address - i - 1, ' ');
1571
1572       for (i = 0; (c = buf[i]); i++)
1573         {
1574           switch (c)
1575             {
1576             case '\a':
1577               fputs ("\\a", stdout);
1578               break;
1579
1580             case '\b':
1581               fputs ("\\b", stdout);
1582               break;
1583
1584             case '\f':
1585               fputs ("\\f", stdout);
1586               break;
1587
1588             case '\n':
1589               fputs ("\\n", stdout);
1590               break;
1591
1592             case '\r':
1593               fputs ("\\r", stdout);
1594               break;
1595
1596             case '\t':
1597               fputs ("\\t", stdout);
1598               break;
1599
1600             case '\v':
1601               fputs ("\\v", stdout);
1602               break;
1603
1604             default:
1605               putc (c, stdout);
1606             }
1607         }
1608       putchar ('\n');
1609     }
1610
1611   /* We reach this point only if we search through
1612      (max_bytes_to_format - string_min) bytes before reaching EOF.  */
1613
1614   free (buf);
1615
1616   ok &= check_and_close (0);
1617   return ok;
1618 }
1619
1620 int
1621 main (int argc, char **argv)
1622 {
1623   int n_files;
1624   int l_c_m;
1625   idx_t desired_width IF_LINT ( = 0);
1626   bool modern = false;
1627   bool width_specified = false;
1628   bool ok = true;
1629   size_t width_per_block = 0;
1630   static char const multipliers[] = "bEGKkMmPQRTYZ0";
1631
1632   /* The old-style 'pseudo starting address' to be printed in parentheses
1633      after any true address.  */
1634   uintmax_t pseudo_start IF_LINT ( = 0);
1635
1636   initialize_main (&argc, &argv);
1637   set_program_name (argv[0]);
1638   setlocale (LC_ALL, "");
1639   bindtextdomain (PACKAGE, LOCALEDIR);
1640   textdomain (PACKAGE);
1641
1642   atexit (close_stdout);
1643
1644   for (idx_t i = 0; i <= MAX_INTEGRAL_TYPE_SIZE; i++)
1645     integral_type_size[i] = NO_SIZE;
1646
1647   integral_type_size[sizeof (char)] = CHAR;
1648   integral_type_size[sizeof (short int)] = SHORT;
1649   integral_type_size[sizeof (int)] = INT;
1650   integral_type_size[sizeof (long int)] = LONG;
1651 #if HAVE_UNSIGNED_LONG_LONG_INT
1652   /* If 'long int' and 'long long int' have the same size, it's fine
1653      to overwrite the entry for 'long' with this one.  */
1654   integral_type_size[sizeof (unsigned_long_long_int)] = LONG_LONG;
1655 #endif
1656
1657   for (idx_t i = 0; i <= MAX_FP_TYPE_SIZE; i++)
1658     fp_type_size[i] = NO_SIZE;
1659
1660 #if FLOAT16_SUPPORTED
1661   fp_type_size[sizeof (float16)] = FLOAT_HALF;
1662 #elif BF16_SUPPORTED
1663   fp_type_size[sizeof (bfloat16)] = FLOAT_HALF;
1664 #endif
1665   fp_type_size[sizeof (float)] = FLOAT_SINGLE;
1666   /* The array entry for 'double' is filled in after that for 'long double'
1667      so that if they are the same size, we avoid any overhead of
1668      long double computation in libc.  */
1669   fp_type_size[sizeof (long double)] = FLOAT_LONG_DOUBLE;
1670   fp_type_size[sizeof (double)] = FLOAT_DOUBLE;
1671
1672   n_specs = 0;
1673   n_specs_allocated = 0;
1674   spec = nullptr;
1675
1676   format_address = format_address_std;
1677   address_base = 8;
1678   address_pad_len = 7;
1679   flag_dump_strings = false;
1680
1681   while (true)
1682     {
1683       uintmax_t tmp;
1684       enum strtol_error s_err;
1685       int oi = -1;
1686       int c = getopt_long (argc, argv, short_options, long_options, &oi);
1687       if (c == -1)
1688         break;
1689
1690       switch (c)
1691         {
1692         case 'A':
1693           modern = true;
1694           switch (optarg[0])
1695             {
1696             case 'd':
1697               format_address = format_address_std;
1698               address_base = 10;
1699               address_pad_len = 7;
1700               break;
1701             case 'o':
1702               format_address = format_address_std;
1703               address_base = 8;
1704               address_pad_len = 7;
1705               break;
1706             case 'x':
1707               format_address = format_address_std;
1708               address_base = 16;
1709               address_pad_len = 6;
1710               break;
1711             case 'n':
1712               format_address = format_address_none;
1713               address_pad_len = 0;
1714               break;
1715             default:
1716               error (EXIT_FAILURE, 0,
1717                      _("invalid output address radix '%c';"
1718                        " it must be one character from [doxn]"),
1719                      optarg[0]);
1720               break;
1721             }
1722           break;
1723
1724         case 'j':
1725           modern = true;
1726           s_err = xstrtoumax (optarg, nullptr, 0,
1727                               &n_bytes_to_skip, multipliers);
1728           if (s_err != LONGINT_OK)
1729             xstrtol_fatal (s_err, oi, c, long_options, optarg);
1730           break;
1731
1732         case 'N':
1733           modern = true;
1734           limit_bytes_to_format = true;
1735
1736           s_err = xstrtoumax (optarg, nullptr, 0, &max_bytes_to_format,
1737                               multipliers);
1738           if (s_err != LONGINT_OK)
1739             xstrtol_fatal (s_err, oi, c, long_options, optarg);
1740           break;
1741
1742         case 'S':
1743           modern = true;
1744           if (optarg == nullptr)
1745             string_min = 3;
1746           else
1747             {
1748               s_err = xstrtoumax (optarg, nullptr, 0, &tmp, multipliers);
1749               if (s_err != LONGINT_OK)
1750                 xstrtol_fatal (s_err, oi, c, long_options, optarg);
1751
1752               /* The minimum string length may be no larger than
1753                  MIN (IDX_MAX, SIZE_MAX), since we may allocate a
1754                  buffer of this size.  */
1755               if (MIN (IDX_MAX, SIZE_MAX) < tmp)
1756                 error (EXIT_FAILURE, 0, _("%s is too large"), quote (optarg));
1757
1758               string_min = tmp;
1759             }
1760           flag_dump_strings = true;
1761           break;
1762
1763         case 't':
1764           modern = true;
1765           ok &= decode_format_string (optarg);
1766           break;
1767
1768         case 'v':
1769           modern = true;
1770           abbreviate_duplicate_blocks = false;
1771           break;
1772
1773         case TRADITIONAL_OPTION:
1774           traditional = true;
1775           break;
1776
1777         case ENDIAN_OPTION:
1778           switch (XARGMATCH ("--endian", optarg, endian_args, endian_types))
1779             {
1780               case endian_big:
1781                   input_swap = ! WORDS_BIGENDIAN;
1782                   break;
1783               case endian_little:
1784                   input_swap = WORDS_BIGENDIAN;
1785                   break;
1786             }
1787           break;
1788
1789           /* The next several cases map the traditional format
1790              specification options to the corresponding modern format
1791              specs.  GNU od accepts any combination of old- and
1792              new-style options.  Format specification options accumulate.
1793              The obsolescent and undocumented formats are compatible
1794              with FreeBSD 4.10 od.  */
1795
1796 #define CASE_OLD_ARG(old_char,new_string)               \
1797         case old_char:                                  \
1798           ok &= decode_format_string (new_string);      \
1799           break
1800
1801           CASE_OLD_ARG ('a', "a");
1802           CASE_OLD_ARG ('b', "o1");
1803           CASE_OLD_ARG ('c', "c");
1804           CASE_OLD_ARG ('D', "u4"); /* obsolescent and undocumented */
1805           CASE_OLD_ARG ('d', "u2");
1806         case 'F': /* obsolescent and undocumented alias */
1807           CASE_OLD_ARG ('e', "fD"); /* obsolescent and undocumented */
1808           CASE_OLD_ARG ('f', "fF");
1809         case 'X': /* obsolescent and undocumented alias */
1810           CASE_OLD_ARG ('H', "x4"); /* obsolescent and undocumented */
1811           CASE_OLD_ARG ('i', "dI");
1812         case 'I': case 'L': /* obsolescent and undocumented aliases */
1813           CASE_OLD_ARG ('l', "dL");
1814           CASE_OLD_ARG ('O', "o4"); /* obsolescent and undocumented */
1815         case 'B': /* obsolescent and undocumented alias */
1816           CASE_OLD_ARG ('o', "o2");
1817           CASE_OLD_ARG ('s', "d2");
1818         case 'h': /* obsolescent and undocumented alias */
1819           CASE_OLD_ARG ('x', "x2");
1820
1821 #undef CASE_OLD_ARG
1822
1823         case 'w':
1824           modern = true;
1825           width_specified = true;
1826           if (optarg == nullptr)
1827             {
1828               desired_width = 32;
1829             }
1830           else
1831             {
1832               intmax_t w_tmp;
1833               s_err = xstrtoimax (optarg, nullptr, 10, &w_tmp, "");
1834               if (s_err != LONGINT_OK || w_tmp <= 0)
1835                 xstrtol_fatal (s_err, oi, c, long_options, optarg);
1836               if (ckd_add (&desired_width, w_tmp, 0))
1837                 error (EXIT_FAILURE, 0, _("%s is too large"), quote (optarg));
1838             }
1839           break;
1840
1841         case_GETOPT_HELP_CHAR;
1842
1843         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1844
1845         default:
1846           usage (EXIT_FAILURE);
1847           break;
1848         }
1849     }
1850
1851   if (!ok)
1852     return EXIT_FAILURE;
1853
1854   if (flag_dump_strings && n_specs > 0)
1855     error (EXIT_FAILURE, 0,
1856            _("no type may be specified when dumping strings"));
1857
1858   n_files = argc - optind;
1859
1860   /* If the --traditional option is used, there may be from
1861      0 to 3 remaining command line arguments;  handle each case
1862      separately.
1863         od [file] [[+]offset[.][b] [[+]label[.][b]]]
1864      The offset and label have the same syntax.
1865
1866      If --traditional is not given, and if no modern options are
1867      given, and if the offset begins with + or (if there are two
1868      operands) a digit, accept only this form, as per POSIX:
1869         od [file] [[+]offset[.][b]]
1870   */
1871
1872   if (!modern || traditional)
1873     {
1874       uintmax_t o1;
1875       uintmax_t o2;
1876
1877       switch (n_files)
1878         {
1879         case 1:
1880           if ((traditional || argv[optind][0] == '+')
1881               && parse_old_offset (argv[optind], &o1))
1882             {
1883               n_bytes_to_skip = o1;
1884               --n_files;
1885               ++argv;
1886             }
1887           break;
1888
1889         case 2:
1890           if ((traditional || argv[optind + 1][0] == '+'
1891                || ISDIGIT (argv[optind + 1][0]))
1892               && parse_old_offset (argv[optind + 1], &o2))
1893             {
1894               if (traditional && parse_old_offset (argv[optind], &o1))
1895                 {
1896                   n_bytes_to_skip = o1;
1897                   flag_pseudo_start = true;
1898                   pseudo_start = o2;
1899                   argv += 2;
1900                   n_files -= 2;
1901                 }
1902               else
1903                 {
1904                   n_bytes_to_skip = o2;
1905                   --n_files;
1906                   argv[optind + 1] = argv[optind];
1907                   ++argv;
1908                 }
1909             }
1910           break;
1911
1912         case 3:
1913           if (traditional
1914               && parse_old_offset (argv[optind + 1], &o1)
1915               && parse_old_offset (argv[optind + 2], &o2))
1916             {
1917               n_bytes_to_skip = o1;
1918               flag_pseudo_start = true;
1919               pseudo_start = o2;
1920               argv[optind + 2] = argv[optind];
1921               argv += 2;
1922               n_files -= 2;
1923             }
1924           break;
1925         }
1926
1927       if (traditional && 1 < n_files)
1928         {
1929           error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
1930           error (0, 0, "%s",
1931                  _("compatibility mode supports at most one file"));
1932           usage (EXIT_FAILURE);
1933         }
1934     }
1935
1936   if (flag_pseudo_start)
1937     {
1938       if (format_address == format_address_none)
1939         {
1940           address_base = 8;
1941           address_pad_len = 7;
1942           format_address = format_address_paren;
1943         }
1944       else
1945         format_address = format_address_label;
1946     }
1947
1948   if (limit_bytes_to_format)
1949     {
1950       end_offset = n_bytes_to_skip + max_bytes_to_format;
1951       if (end_offset < n_bytes_to_skip)
1952         error (EXIT_FAILURE, 0, _("skip-bytes + read-bytes is too large"));
1953     }
1954
1955   if (n_specs == 0)
1956     decode_format_string ("oS");
1957
1958   if (n_files > 0)
1959     {
1960       /* Set the global pointer FILE_LIST so that it
1961          references the first file-argument on the command-line.  */
1962
1963       file_list = (char const *const *) &argv[optind];
1964     }
1965   else
1966     {
1967       /* No files were listed on the command line.
1968          Set the global pointer FILE_LIST so that it
1969          references the null-terminated list of one name: "-".  */
1970
1971       file_list = default_file_list;
1972     }
1973
1974   /* open the first input file */
1975   ok = open_next_file ();
1976   if (in_stream == nullptr)
1977     goto cleanup;
1978
1979   /* skip over any unwanted header bytes */
1980   ok &= skip (n_bytes_to_skip);
1981   if (in_stream == nullptr)
1982     goto cleanup;
1983
1984   pseudo_offset = (flag_pseudo_start ? pseudo_start - n_bytes_to_skip : 0);
1985
1986   /* Compute output block length.  */
1987   l_c_m = get_lcm ();
1988
1989   if (width_specified)
1990     {
1991       if (desired_width != 0 && desired_width % l_c_m == 0)
1992         bytes_per_block = desired_width;
1993       else
1994         {
1995           error (0, 0, _("warning: invalid width %td; using %d instead"),
1996                  desired_width, l_c_m);
1997           bytes_per_block = l_c_m;
1998         }
1999     }
2000   else
2001     {
2002       if (l_c_m < DEFAULT_BYTES_PER_BLOCK)
2003         bytes_per_block = l_c_m * (DEFAULT_BYTES_PER_BLOCK / l_c_m);
2004       else
2005         bytes_per_block = l_c_m;
2006     }
2007
2008   /* Compute padding necessary to align output block.  */
2009   for (idx_t i = 0; i < n_specs; i++)
2010     {
2011       int fields_per_block = bytes_per_block / width_bytes[spec[i].size];
2012       int block_width = (spec[i].field_width + 1) * fields_per_block;
2013       if (width_per_block < block_width)
2014         width_per_block = block_width;
2015     }
2016   for (idx_t i = 0; i < n_specs; i++)
2017     {
2018       int fields_per_block = bytes_per_block / width_bytes[spec[i].size];
2019       int block_width = spec[i].field_width * fields_per_block;
2020       spec[i].pad_width = width_per_block - block_width;
2021     }
2022
2023 #ifdef DEBUG
2024   printf ("lcm=%d, width_per_block=%zu\n", l_c_m, width_per_block);
2025   for (idx_t i = 0; i < n_specs; i++)
2026     {
2027       int fields_per_block = bytes_per_block / width_bytes[spec[i].size];
2028       affirm (bytes_per_block % width_bytes[spec[i].size] == 0);
2029       affirm (1 <= spec[i].pad_width / fields_per_block);
2030       printf ("%d: fmt=\"%s\" in_width=%d out_width=%d pad=%d\n",
2031               i, spec[i].fmt_string, width_bytes[spec[i].size],
2032               spec[i].field_width, spec[i].pad_width);
2033     }
2034 #endif
2035
2036   ok &= (flag_dump_strings ? dump_strings () : dump ());
2037
2038 cleanup:
2039
2040   if (have_read_stdin && fclose (stdin) == EOF)
2041     error (EXIT_FAILURE, errno, _("standard input"));
2042
2043   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
2044 }