Obey HAVE_GOLD=no to no intent to build gold
[minix3.git] / usr.bin / hexdump / parse.c
blobe29d966a566be90ab777764b6949f8b14dcaf300
1 /* $NetBSD: parse.c,v 1.27 2011/09/04 20:27:27 joerg Exp $ */
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
36 #include <sys/cdefs.h>
37 #if !defined(lint)
38 #if 0
39 static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93";
40 #else
41 __RCSID("$NetBSD: parse.c,v 1.27 2011/09/04 20:27:27 joerg Exp $");
42 #endif
43 #endif /* not lint */
45 #include <sys/types.h>
46 #include <sys/file.h>
48 #include <ctype.h>
49 #include <err.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <inttypes.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <util.h>
58 #include "hexdump.h"
60 __dead static void badcnt(char *);
61 __dead static void badconv(char *);
62 __dead static void badfmt(const char *);
63 __dead static void badsfmt(void);
65 FU *endfu; /* format at end-of-data */
67 void
68 addfile(char *name)
70 char *p;
71 FILE *fp;
72 int ch;
73 char buf[2048 + 1];
75 if ((fp = fopen(name, "r")) == NULL)
76 err(1, "fopen %s", name);
77 while (fgets(buf, sizeof(buf), fp)) {
78 if (!(p = strchr(buf, '\n'))) {
79 warnx("line too long.");
80 while ((ch = getchar()) != '\n' && ch != EOF);
81 continue;
83 *p = '\0';
84 for (p = buf; *p && isspace((unsigned char)*p); ++p);
85 if (!*p || *p == '#')
86 continue;
87 add(p);
89 (void)fclose(fp);
92 void
93 add(const char *fmt)
95 const char *p;
96 static FS **nextfs;
97 FS *tfs;
98 FU *tfu, **nextfu;
99 const char *savep;
101 /* start new linked list of format units */
102 tfs = ecalloc(1, sizeof(FS));
103 if (!fshead)
104 fshead = tfs;
105 else
106 *nextfs = tfs;
107 nextfs = &tfs->nextfs;
108 nextfu = &tfs->nextfu;
110 /* take the format string and break it up into format units */
111 for (p = fmt;;) {
112 /* skip leading white space */
113 for (; isspace((unsigned char)*p); ++p);
114 if (!*p)
115 break;
117 /* allocate a new format unit and link it in */
118 tfu = ecalloc(1, sizeof(FU));
119 *nextfu = tfu;
120 nextfu = &tfu->nextfu;
121 tfu->reps = 1;
123 /* if leading digit, repetition count */
124 if (isdigit((unsigned char)*p)) {
125 for (savep = p; isdigit((unsigned char)*p); ++p);
126 if (!isspace((unsigned char)*p) && *p != '/')
127 badfmt(fmt);
128 /* may overwrite either white space or slash */
129 tfu->reps = atoi(savep);
130 tfu->flags = F_SETREP;
131 /* skip trailing white space */
132 for (++p; isspace((unsigned char)*p); ++p);
135 /* skip slash and trailing white space */
136 if (*p == '/')
137 while (isspace((unsigned char)*++p));
139 /* byte count */
140 if (isdigit((unsigned char)*p)) {
141 for (savep = p; isdigit((unsigned char)*p); ++p);
142 if (!isspace((unsigned char)*p))
143 badfmt(fmt);
144 tfu->bcnt = atoi(savep);
145 /* skip trailing white space */
146 for (++p; isspace((unsigned char)*p); ++p);
149 /* format */
150 if (*p != '"')
151 badfmt(fmt);
152 for (savep = ++p; *p != '"';)
153 if (*p++ == 0)
154 badfmt(fmt);
155 tfu->fmt = emalloc(p - savep + 1);
156 (void) strncpy(tfu->fmt, savep, p - savep);
157 tfu->fmt[p - savep] = '\0';
158 escape(tfu->fmt);
159 p++;
163 static const char *spec = ".#-+ 0123456789";
166 size(FS *fs)
168 FU *fu;
169 int bcnt, cursize;
170 char *fmt;
171 int prec;
173 /* figure out the data block size needed for each format unit */
174 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
175 if (fu->bcnt) {
176 cursize += fu->bcnt * fu->reps;
177 continue;
179 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
180 if (*fmt != '%')
181 continue;
183 * skip any special chars -- save precision in
184 * case it's a %s format.
186 while (strchr(spec + 1, *++fmt));
187 if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
188 prec = atoi(fmt);
189 while (isdigit((unsigned char)*++fmt));
191 switch(*fmt) {
192 case 'c':
193 bcnt += 1;
194 break;
195 case 'd': case 'i': case 'o': case 'u':
196 case 'x': case 'X':
197 bcnt += 4;
198 break;
199 case 'e': case 'E': case 'f': case 'g': case 'G':
200 bcnt += 8;
201 break;
202 case 's':
203 bcnt += prec;
204 break;
205 case '_':
206 switch(*++fmt) {
207 case 'c': case 'p': case 'u':
208 bcnt += 1;
209 break;
213 cursize += bcnt * fu->reps;
215 return (cursize);
218 void
219 rewrite(FS *fs)
221 enum { NOTOKAY, USEBCNT, USEPREC } sokay;
222 PR *pr, **nextpr;
223 FU *fu;
224 char *p1, *p2;
225 char savech, *fmtp, cs[sizeof(PRId64)];
226 int nconv, prec;
228 prec = 0;
229 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
231 * Break each format unit into print units; each conversion
232 * character gets its own.
234 nextpr = &fu->nextpr;
235 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
236 pr = ecalloc(1, sizeof(*pr));
237 *nextpr = pr;
239 /* Skip preceding text and up to the next % sign. */
240 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
242 /* Only text in the string. */
243 if (!*p1) {
244 pr->fmt = fmtp;
245 pr->flags = F_TEXT;
246 break;
250 * Get precision for %s -- if have a byte count, don't
251 * need it.
253 if (fu->bcnt) {
254 sokay = USEBCNT;
255 /* Skip to conversion character. */
256 for (++p1; *p1 && strchr(spec, *p1); ++p1);
257 } else {
258 /* Skip any special chars, field width. */
259 while (*++p1 && strchr(spec + 1, *p1));
260 if (*p1 == '.' &&
261 isdigit((unsigned char)*++p1)) {
262 sokay = USEPREC;
263 prec = atoi(p1);
264 while (isdigit((unsigned char)*++p1))
265 continue;
266 } else
267 sokay = NOTOKAY;
270 p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */
271 cs[0] = *p1; /* Set conversion string. */
272 cs[1] = '\0';
275 * Figure out the byte count for each conversion;
276 * rewrite the format as necessary, set up blank-
277 * padding for end of data.
279 switch(cs[0]) {
280 case 'c':
281 pr->flags = F_CHAR;
282 switch(fu->bcnt) {
283 case 0: case 1:
284 pr->bcnt = 1;
285 break;
286 default:
287 p1[1] = '\0';
288 badcnt(p1);
290 break;
291 case 'd': case 'i':
292 pr->flags = F_INT;
293 goto isint;
294 case 'o': case 'u': case 'x': case 'X':
295 pr->flags = F_UINT;
296 isint:
298 * Regardless of pr->bcnt, all integer
299 * values are cast to [u]int64_t before
300 * being printed by display(). We
301 * therefore need to use PRI?64 as the
302 * format, where '?' could actually
303 * be any of [diouxX]. We make the
304 * assumption (not guaranteed by the
305 * C99 standard) that we can derive
306 * all the other PRI?64 values from
307 * PRId64 simply by changing the last
308 * character. For example, if PRId64 is
309 * "lld" or "qd", and cs[0] is 'o', then
310 * we end up with "llo" or "qo".
312 savech = cs[0];
313 strncpy(cs, PRId64, sizeof(PRId64) - 2);
314 cs[sizeof(PRId64) - 2] = savech;
315 cs[sizeof(PRId64) - 1] = '\0';
316 switch(fu->bcnt) {
317 case 0: case 4:
318 pr->bcnt = 4;
319 break;
320 case 1:
321 pr->bcnt = 1;
322 break;
323 case 2:
324 pr->bcnt = 2;
325 break;
326 case 8:
327 pr->bcnt = 8;
328 break;
329 default:
330 p1[1] = '\0';
331 badcnt(p1);
333 break;
334 case 'e': case 'E': case 'f': case 'g': case 'G':
335 pr->flags = F_DBL;
336 switch(fu->bcnt) {
337 case 0: case 8:
338 pr->bcnt = 8;
339 break;
340 case 4:
341 pr->bcnt = 4;
342 break;
343 default:
344 p1[1] = '\0';
345 badcnt(p1);
347 break;
348 case 's':
349 pr->flags = F_STR;
350 switch(sokay) {
351 case NOTOKAY:
352 badsfmt();
353 case USEBCNT:
354 pr->bcnt = fu->bcnt;
355 break;
356 case USEPREC:
357 pr->bcnt = prec;
358 break;
360 break;
361 case '_':
362 ++p2;
363 switch(p1[1]) {
364 case 'A':
365 endfu = fu;
366 fu->flags |= F_IGNORE;
367 /* FALLTHROUGH */
368 case 'a':
369 pr->flags = F_ADDRESS;
370 ++p2;
371 switch(p1[2]) {
372 case 'd': case 'o': case'x':
374 * See comments above for
375 * the way we use PRId64.
377 strncpy(cs, PRId64,
378 sizeof(PRId64) - 2);
379 cs[sizeof(PRId64) - 2] = p1[2];
380 cs[sizeof(PRId64) - 1] = '\0';
381 break;
382 default:
383 p1[3] = '\0';
384 badconv(p1);
386 break;
387 case 'c':
388 pr->flags = F_C;
389 /* cs[0] = 'c'; set in conv_c */
390 goto isint2;
391 case 'p':
392 pr->flags = F_P;
393 cs[0] = 'c';
394 goto isint2;
395 case 'u':
396 pr->flags = F_U;
397 /* cs[0] = 'c'; set in conv_u */
398 isint2: switch(fu->bcnt) {
399 case 0: case 1:
400 pr->bcnt = 1;
401 break;
402 default:
403 p1[2] = '\0';
404 badcnt(p1);
406 break;
407 default:
408 p1[2] = '\0';
409 badconv(p1);
411 break;
412 default:
413 p1[1] = '\0';
414 badconv(p1);
418 * Copy to PR format string, set conversion character
419 * pointer, update original.
421 savech = *p2;
422 p1[0] = '\0';
423 pr->fmt = emalloc(strlen(fmtp) + strlen(cs) + 1);
424 (void)strcpy(pr->fmt, fmtp);
425 (void)strcat(pr->fmt, cs);
426 *p2 = savech;
427 pr->cchar = pr->fmt + (p1 - fmtp);
428 fmtp = p2;
430 /* Only one conversion character if byte count. */
431 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
432 errx(1,
433 "byte count with multiple conversion characters");
436 * If format unit byte count not specified, figure it out
437 * so can adjust rep count later.
439 if (!fu->bcnt)
440 for (pr = fu->nextpr; pr; pr = pr->nextpr)
441 fu->bcnt += pr->bcnt;
444 * If the format string interprets any data at all, and it's
445 * not the same as the blocksize, and its last format unit
446 * interprets any data at all, and has no iteration count,
447 * repeat it as necessary.
449 * If, rep count is greater than 1, no trailing whitespace
450 * gets output from the last iteration of the format unit.
452 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
453 if (!fu->nextfu && fs->bcnt < blocksize &&
454 !(fu->flags&F_SETREP) && fu->bcnt)
455 fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
456 if (fu->reps > 1) {
457 if (!fu->nextpr)
458 break;
459 for (pr = fu->nextpr;; pr = pr->nextpr)
460 if (!pr->nextpr)
461 break;
462 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
463 p2 = isspace((unsigned char)*p1) ? p1 : NULL;
464 if (p2)
465 pr->nospace = p2;
468 #ifdef DEBUG
469 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
470 (void)printf("fmt:");
471 for (pr = fu->nextpr; pr; pr = pr->nextpr)
472 (void)printf(" {%s}", pr->fmt);
473 (void)printf("\n");
475 #endif
478 void
479 escape(char *p1)
481 char *p2;
483 /* alphabetic escape sequences have to be done in place */
484 for (p2 = p1;; ++p1, ++p2) {
485 if (!*p1) {
486 *p2 = *p1;
487 break;
489 if (*p1 == '\\')
490 switch(*++p1) {
491 case '\0':
492 *p2 = '\\';
493 *++p2 = '\0';
494 return; /* incomplete escape sequence */
495 case 'a':
496 /* *p2 = '\a'; */
497 *p2 = '\007';
498 break;
499 case 'b':
500 *p2 = '\b';
501 break;
502 case 'f':
503 *p2 = '\f';
504 break;
505 case 'n':
506 *p2 = '\n';
507 break;
508 case 'r':
509 *p2 = '\r';
510 break;
511 case 't':
512 *p2 = '\t';
513 break;
514 case 'v':
515 *p2 = '\v';
516 break;
517 default:
518 *p2 = *p1;
519 break;
521 else
522 *p2 = *p1;
526 static void
527 badcnt(char *s)
529 errx(1, "%s: bad byte count", s);
532 static void
533 badsfmt(void)
535 errx(1, "%%s: requires a precision or a byte count");
538 static void
539 badfmt(const char *fmt)
541 errx(1, "\"%s\": bad format", fmt);
544 static void
545 badconv(char *ch)
547 errx(1, "%%%s: bad conversion character", ch);