usr.bin/hexdump/parse.c

   1 /*      $NetBSD: parse.c,v 1.25 2009/01/17 23:24:30 hans Exp $  */
   2
   3 /*
   4  * Copyright (c) 1989, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  * 3. Neither the name of the University nor the names of its contributors
  16  *    may be used to endorse or promote products derived from this software
  17  *    without specific prior written permission.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  29  * SUCH DAMAGE.
  30  */
  31
  32 #if HAVE_NBTOOL_CONFIG_H
  33 #include "nbtool_config.h"
  34 #endif
  35
  36 #include <sys/cdefs.h>
  37 #if !defined(lint)
  38 #if 0
  39 static char sccsid[] = "@(#)parse.c     8.1 (Berkeley) 6/6/93";
  40 #else
  41 __RCSID("$NetBSD: parse.c,v 1.25 2009/01/17 23:24:30 hans Exp $");
  42 #endif
  43 #endif /* not lint */
  44
  45 #include <sys/types.h>
  46 #include <sys/file.h>
  47
  48 #include <ctype.h>
  49 #include <err.h>
  50 #include <errno.h>
  51 #include <fcntl.h>
  52 #include <inttypes.h>
  53 #include <stdio.h>
  54 #include <stdlib.h>
  55 #include <string.h>
  56 #include <util.h>
  57
  58 #include "hexdump.h"
  59
  60 FU *endfu;                                      /* format at end-of-data */
  61
  62 void
  63 addfile(char *name)
  64 {
  65         char *p;
  66         FILE *fp;
  67         int ch;
  68         char buf[2048 + 1];
  69
  70         if ((fp = fopen(name, "r")) == NULL)
  71                 err(1, "fopen %s", name);
  72         while (fgets(buf, sizeof(buf), fp)) {
  73                 if (!(p = strchr(buf, '\n'))) {
  74                         warnx("line too long.");
  75                         while ((ch = getchar()) != '\n' && ch != EOF);
  76                         continue;
  77                 }
  78                 *p = '\0';
  79                 for (p = buf; *p && isspace((unsigned char)*p); ++p);
  80                 if (!*p || *p == '#')
  81                         continue;
  82                 add(p);
  83         }
  84         (void)fclose(fp);
  85 }
  86
  87 void
  88 add(const char *fmt)
  89 {
  90         const char *p;
  91         static FS **nextfs;
  92         FS *tfs;
  93         FU *tfu, **nextfu;
  94         const char *savep;
  95
  96         /* start new linked list of format units */
  97         tfs = ecalloc(1, sizeof(FS));
  98         if (!fshead)
  99                 fshead = tfs;
 100         else
 101                 *nextfs = tfs;
 102         nextfs = &tfs->nextfs;
 103         nextfu = &tfs->nextfu;
 104
 105         /* take the format string and break it up into format units */
 106         for (p = fmt;;) {
 107                 /* skip leading white space */
 108                 for (; isspace((unsigned char)*p); ++p);
 109                 if (!*p)
 110                         break;
 111
 112                 /* allocate a new format unit and link it in */
 113                 tfu = ecalloc(1, sizeof(FU));
 114                 *nextfu = tfu;
 115                 nextfu = &tfu->nextfu;
 116                 tfu->reps = 1;
 117
 118                 /* if leading digit, repetition count */
 119                 if (isdigit((unsigned char)*p)) {
 120                         for (savep = p; isdigit((unsigned char)*p); ++p);
 121                         if (!isspace((unsigned char)*p) && *p != '/')
 122                                 badfmt(fmt);
 123                         /* may overwrite either white space or slash */
 124                         tfu->reps = atoi(savep);
 125                         tfu->flags = F_SETREP;
 126                         /* skip trailing white space */
 127                         for (++p; isspace((unsigned char)*p); ++p);
 128                 }
 129
 130                 /* skip slash and trailing white space */
 131                 if (*p == '/')
 132                         while (isspace((unsigned char)*++p));
 133
 134                 /* byte count */
 135                 if (isdigit((unsigned char)*p)) {
 136                         for (savep = p; isdigit((unsigned char)*p); ++p);
 137                         if (!isspace((unsigned char)*p))
 138                                 badfmt(fmt);
 139                         tfu->bcnt = atoi(savep);
 140                         /* skip trailing white space */
 141                         for (++p; isspace((unsigned char)*p); ++p);
 142                 }
 143
 144                 /* format */
 145                 if (*p != '"')
 146                         badfmt(fmt);
 147                 for (savep = ++p; *p != '"';)
 148                         if (*p++ == 0)
 149                                 badfmt(fmt);
 150                 tfu->fmt = emalloc(p - savep + 1);
 151                 (void) strncpy(tfu->fmt, savep, p - savep);
 152                 tfu->fmt[p - savep] = '\0';
 153                 escape(tfu->fmt);
 154                 p++;
 155         }
 156 }
 157
 158 static const char *spec = ".#-+ 0123456789";
 159
 160 int
 161 size(FS *fs)
 162 {
 163         FU *fu;
 164         int bcnt, cursize;
 165         char *fmt;
 166         int prec;
 167
 168         /* figure out the data block size needed for each format unit */
 169         for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
 170                 if (fu->bcnt) {
 171                         cursize += fu->bcnt * fu->reps;
 172                         continue;
 173                 }
 174                 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
 175                         if (*fmt != '%')
 176                                 continue;
 177                         /*
 178                          * skip any special chars -- save precision in
 179                          * case it's a %s format.
 180                          */
 181                         while (strchr(spec + 1, *++fmt));
 182                         if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
 183                                 prec = atoi(fmt);
 184                                 while (isdigit((unsigned char)*++fmt));
 185                         }
 186                         switch(*fmt) {
 187                         case 'c':
 188                                 bcnt += 1;
 189                                 break;
 190                         case 'd': case 'i': case 'o': case 'u':
 191                         case 'x': case 'X':
 192                                 bcnt += 4;
 193                                 break;
 194                         case 'e': case 'E': case 'f': case 'g': case 'G':
 195                                 bcnt += 8;
 196                                 break;
 197                         case 's':
 198                                 bcnt += prec;
 199                                 break;
 200                         case '_':
 201                                 switch(*++fmt) {
 202                                 case 'c': case 'p': case 'u':
 203                                         bcnt += 1;
 204                                         break;
 205                                 }
 206                         }
 207                 }
 208                 cursize += bcnt * fu->reps;
 209         }
 210         return (cursize);
 211 }
 212
 213 void
 214 rewrite(FS *fs)
 215 {
 216         enum { NOTOKAY, USEBCNT, USEPREC } sokay;
 217         PR *pr, **nextpr;
 218         FU *fu;
 219         char *p1, *p2;
 220         char savech, *fmtp, cs[sizeof(PRId64)];
 221         int nconv, prec;
 222
 223         prec = 0;
 224         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 225                 /*
 226                  * Break each format unit into print units; each conversion
 227                  * character gets its own.
 228                  */
 229                 nextpr = &fu->nextpr;
 230                 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
 231                         pr = ecalloc(1, sizeof(*pr));
 232                         *nextpr = pr;
 233
 234                         /* Skip preceding text and up to the next % sign. */
 235                         for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
 236
 237                         /* Only text in the string. */
 238                         if (!*p1) {
 239                                 pr->fmt = fmtp;
 240                                 pr->flags = F_TEXT;
 241                                 break;
 242                         }
 243
 244                         /*
 245                          * Get precision for %s -- if have a byte count, don't
 246                          * need it.
 247                          */
 248                         if (fu->bcnt) {
 249                                 sokay = USEBCNT;
 250                                 /* Skip to conversion character. */
 251                                 for (++p1; *p1 && strchr(spec, *p1); ++p1);
 252                         } else {
 253                                 /* Skip any special chars, field width. */
 254                                 while (*++p1 && strchr(spec + 1, *p1));
 255                                 if (*p1 == '.' &&
 256                                     isdigit((unsigned char)*++p1)) {
 257                                         sokay = USEPREC;
 258                                         prec = atoi(p1);
 259                                         while (isdigit((unsigned char)*++p1))
 260                                                 continue;
 261                                 } else
 262                                         sokay = NOTOKAY;
 263                         }
 264
 265                         p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */
 266                         cs[0] = *p1;            /* Set conversion string. */
 267                         cs[1] = '\0';
 268
 269                         /*
 270                          * Figure out the byte count for each conversion;
 271                          * rewrite the format as necessary, set up blank-
 272                          * padding for end of data.
 273                          */
 274                         switch(cs[0]) {
 275                         case 'c':
 276                                 pr->flags = F_CHAR;
 277                                 switch(fu->bcnt) {
 278                                 case 0: case 1:
 279                                         pr->bcnt = 1;
 280                                         break;
 281                                 default:
 282                                         p1[1] = '\0';
 283                                         badcnt(p1);
 284                                 }
 285                                 break;
 286                         case 'd': case 'i':
 287                                 pr->flags = F_INT;
 288                                 goto isint;
 289                         case 'o': case 'u': case 'x': case 'X':
 290                                 pr->flags = F_UINT;
 291 isint:
 292                                 /*
 293                                  * Regardless of pr->bcnt, all integer
 294                                  * values are cast to [u]int64_t before
 295                                  * being printed by display().  We
 296                                  * therefore need to use PRI?64 as the
 297                                  * format, where '?' could actually
 298                                  * be any of [diouxX].  We make the
 299                                  * assumption (not guaranteed by the
 300                                  * C99 standard) that we can derive
 301                                  * all the other PRI?64 values from
 302                                  * PRId64 simply by changing the last
 303                                  * character.  For example, if PRId64 is
 304                                  * "lld" or "qd", and cs[0] is 'o', then
 305                                  * we end up with "llo" or "qo".
 306                                  */
 307                                 savech = cs[0];
 308                                 strncpy(cs, PRId64, sizeof(PRId64) - 2);
 309                                 cs[sizeof(PRId64) - 2] = savech;
 310                                 cs[sizeof(PRId64) - 1] = '\0';
 311                                 switch(fu->bcnt) {
 312                                 case 0: case 4:
 313                                         pr->bcnt = 4;
 314                                         break;
 315                                 case 1:
 316                                         pr->bcnt = 1;
 317                                         break;
 318                                 case 2:
 319                                         pr->bcnt = 2;
 320                                         break;
 321                                 case 8:
 322                                         pr->bcnt = 8;
 323                                         break;
 324                                 default:
 325                                         p1[1] = '\0';
 326                                         badcnt(p1);
 327                                 }
 328                                 break;
 329                         case 'e': case 'E': case 'f': case 'g': case 'G':
 330                                 pr->flags = F_DBL;
 331                                 switch(fu->bcnt) {
 332                                 case 0: case 8:
 333                                         pr->bcnt = 8;
 334                                         break;
 335                                 case 4:
 336                                         pr->bcnt = 4;
 337                                         break;
 338                                 default:
 339                                         p1[1] = '\0';
 340                                         badcnt(p1);
 341                                 }
 342                                 break;
 343                         case 's':
 344                                 pr->flags = F_STR;
 345                                 switch(sokay) {
 346                                 case NOTOKAY:
 347                                         badsfmt();
 348                                 case USEBCNT:
 349                                         pr->bcnt = fu->bcnt;
 350                                         break;
 351                                 case USEPREC:
 352                                         pr->bcnt = prec;
 353                                         break;
 354                                 }
 355                                 break;
 356                         case '_':
 357                                 ++p2;
 358                                 switch(p1[1]) {
 359                                 case 'A':
 360                                         endfu = fu;
 361                                         fu->flags |= F_IGNORE;
 362                                         /* FALLTHROUGH */
 363                                 case 'a':
 364                                         pr->flags = F_ADDRESS;
 365                                         ++p2;
 366                                         switch(p1[2]) {
 367                                         case 'd': case 'o': case'x':
 368                                                 /*
 369                                                  * See comments above for
 370                                                  * the way we use PRId64.
 371                                                  */
 372                                                 strncpy(cs, PRId64,
 373                                                         sizeof(PRId64) - 2);
 374                                                 cs[sizeof(PRId64) - 2] = p1[2];
 375                                                 cs[sizeof(PRId64) - 1] = '\0';
 376                                                 break;
 377                                         default:
 378                                                 p1[3] = '\0';
 379                                                 badconv(p1);
 380                                         }
 381                                         break;
 382                                 case 'c':
 383                                         pr->flags = F_C;
 384                                         /* cs[0] = 'c'; set in conv_c */
 385                                         goto isint2;
 386                                 case 'p':
 387                                         pr->flags = F_P;
 388                                         cs[0] = 'c';
 389                                         goto isint2;
 390                                 case 'u':
 391                                         pr->flags = F_U;
 392                                         /* cs[0] = 'c'; set in conv_u */
 393 isint2:                                 switch(fu->bcnt) {
 394                                         case 0: case 1:
 395                                                 pr->bcnt = 1;
 396                                                 break;
 397                                         default:
 398                                                 p1[2] = '\0';
 399                                                 badcnt(p1);
 400                                         }
 401                                         break;
 402                                 default:
 403                                         p1[2] = '\0';
 404                                         badconv(p1);
 405                                 }
 406                                 break;
 407                         default:
 408                                 p1[1] = '\0';
 409                                 badconv(p1);
 410                         }
 411
 412                         /*
 413                          * Copy to PR format string, set conversion character
 414                          * pointer, update original.
 415                          */
 416                         savech = *p2;
 417                         p1[0] = '\0';
 418                         pr->fmt = emalloc(strlen(fmtp) + strlen(cs) + 1);
 419                         (void)strcpy(pr->fmt, fmtp);
 420                         (void)strcat(pr->fmt, cs);
 421                         *p2 = savech;
 422                         pr->cchar = pr->fmt + (p1 - fmtp);
 423                         fmtp = p2;
 424
 425                         /* Only one conversion character if byte count. */
 426                         if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
 427                                 errx(1,
 428                             "byte count with multiple conversion characters");
 429                 }
 430                 /*
 431                  * If format unit byte count not specified, figure it out
 432                  * so can adjust rep count later.
 433                  */
 434                 if (!fu->bcnt)
 435                         for (pr = fu->nextpr; pr; pr = pr->nextpr)
 436                                 fu->bcnt += pr->bcnt;
 437         }
 438         /*
 439          * If the format string interprets any data at all, and it's
 440          * not the same as the blocksize, and its last format unit
 441          * interprets any data at all, and has no iteration count,
 442          * repeat it as necessary.
 443          *
 444          * If, rep count is greater than 1, no trailing whitespace
 445          * gets output from the last iteration of the format unit.
 446          */
 447         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 448                 if (!fu->nextfu && fs->bcnt < blocksize &&
 449                     !(fu->flags&F_SETREP) && fu->bcnt)
 450                         fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
 451                 if (fu->reps > 1) {
 452                         if (!fu->nextpr)
 453                                 break;
 454                         for (pr = fu->nextpr;; pr = pr->nextpr)
 455                                 if (!pr->nextpr)
 456                                         break;
 457                         for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
 458                                 p2 = isspace((unsigned char)*p1) ? p1 : NULL;
 459                         if (p2)
 460                                 pr->nospace = p2;
 461                 }
 462         }
 463 #ifdef DEBUG
 464         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 465                 (void)printf("fmt:");
 466                 for (pr = fu->nextpr; pr; pr = pr->nextpr)
 467                         (void)printf(" {%s}", pr->fmt);
 468                 (void)printf("\n");
 469         }
 470 #endif
 471 }
 472
 473 void
 474 escape(char *p1)
 475 {
 476         char *p2;
 477
 478         /* alphabetic escape sequences have to be done in place */
 479         for (p2 = p1;; ++p1, ++p2) {
 480                 if (!*p1) {
 481                         *p2 = *p1;
 482                         break;
 483                 }
 484                 if (*p1 == '\\')
 485                         switch(*++p1) {
 486                         case '\0':
 487                                 *p2 = '\\';
 488                                 *++p2 = '\0';
 489                                 return; /* incomplete escape sequence */
 490                         case 'a':
 491                              /* *p2 = '\a'; */
 492                                 *p2 = '\007';
 493                                 break;
 494                         case 'b':
 495                                 *p2 = '\b';
 496                                 break;
 497                         case 'f':
 498                                 *p2 = '\f';
 499                                 break;
 500                         case 'n':
 501                                 *p2 = '\n';
 502                                 break;
 503                         case 'r':
 504                                 *p2 = '\r';
 505                                 break;
 506                         case 't':
 507                                 *p2 = '\t';
 508                                 break;
 509                         case 'v':
 510                                 *p2 = '\v';
 511                                 break;
 512                         default:
 513                                 *p2 = *p1;
 514                                 break;
 515                         }
 516                 else
 517                         *p2 = *p1;
 518         }
 519 }
 520
 521 void
 522 badcnt(char *s)
 523 {
 524         errx(1, "%s: bad byte count", s);
 525 }
 526
 527 void
 528 badsfmt(void)
 529 {
 530         errx(1, "%%s: requires a precision or a byte count");
 531 }
 532
 533 void
 534 badfmt(const char *fmt)
 535 {
 536         errx(1, "\"%s\": bad format", fmt);
 537 }
 538
 539 void
 540 badconv(char *ch)
 541 {
 542         errx(1, "%%%s: bad conversion character", ch);
 543 }