Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / usr.bin / hexdump / parse.c
blob9052a1e5ec21564df52c9679c868399879ede828
1 /* $NetBSD: parse.c,v 1.25 2009/01/17 23:24:30 hans Exp $ */
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
36 #include <sys/cdefs.h>
37 #if !defined(lint)
38 #if 0
39 static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93";
40 #else
41 __RCSID("$NetBSD: parse.c,v 1.25 2009/01/17 23:24:30 hans Exp $");
42 #endif
43 #endif /* not lint */
45 #include <sys/types.h>
46 #include <sys/file.h>
48 #include <ctype.h>
49 #include <err.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <inttypes.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <util.h>
58 #include "hexdump.h"
60 FU *endfu; /* format at end-of-data */
62 void
63 addfile(char *name)
65 char *p;
66 FILE *fp;
67 int ch;
68 char buf[2048 + 1];
70 if ((fp = fopen(name, "r")) == NULL)
71 err(1, "fopen %s", name);
72 while (fgets(buf, sizeof(buf), fp)) {
73 if (!(p = strchr(buf, '\n'))) {
74 warnx("line too long.");
75 while ((ch = getchar()) != '\n' && ch != EOF);
76 continue;
78 *p = '\0';
79 for (p = buf; *p && isspace((unsigned char)*p); ++p);
80 if (!*p || *p == '#')
81 continue;
82 add(p);
84 (void)fclose(fp);
87 void
88 add(const char *fmt)
90 const char *p;
91 static FS **nextfs;
92 FS *tfs;
93 FU *tfu, **nextfu;
94 const char *savep;
96 /* start new linked list of format units */
97 tfs = ecalloc(1, sizeof(FS));
98 if (!fshead)
99 fshead = tfs;
100 else
101 *nextfs = tfs;
102 nextfs = &tfs->nextfs;
103 nextfu = &tfs->nextfu;
105 /* take the format string and break it up into format units */
106 for (p = fmt;;) {
107 /* skip leading white space */
108 for (; isspace((unsigned char)*p); ++p);
109 if (!*p)
110 break;
112 /* allocate a new format unit and link it in */
113 tfu = ecalloc(1, sizeof(FU));
114 *nextfu = tfu;
115 nextfu = &tfu->nextfu;
116 tfu->reps = 1;
118 /* if leading digit, repetition count */
119 if (isdigit((unsigned char)*p)) {
120 for (savep = p; isdigit((unsigned char)*p); ++p);
121 if (!isspace((unsigned char)*p) && *p != '/')
122 badfmt(fmt);
123 /* may overwrite either white space or slash */
124 tfu->reps = atoi(savep);
125 tfu->flags = F_SETREP;
126 /* skip trailing white space */
127 for (++p; isspace((unsigned char)*p); ++p);
130 /* skip slash and trailing white space */
131 if (*p == '/')
132 while (isspace((unsigned char)*++p));
134 /* byte count */
135 if (isdigit((unsigned char)*p)) {
136 for (savep = p; isdigit((unsigned char)*p); ++p);
137 if (!isspace((unsigned char)*p))
138 badfmt(fmt);
139 tfu->bcnt = atoi(savep);
140 /* skip trailing white space */
141 for (++p; isspace((unsigned char)*p); ++p);
144 /* format */
145 if (*p != '"')
146 badfmt(fmt);
147 for (savep = ++p; *p != '"';)
148 if (*p++ == 0)
149 badfmt(fmt);
150 tfu->fmt = emalloc(p - savep + 1);
151 (void) strncpy(tfu->fmt, savep, p - savep);
152 tfu->fmt[p - savep] = '\0';
153 escape(tfu->fmt);
154 p++;
158 static const char *spec = ".#-+ 0123456789";
161 size(FS *fs)
163 FU *fu;
164 int bcnt, cursize;
165 char *fmt;
166 int prec;
168 /* figure out the data block size needed for each format unit */
169 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
170 if (fu->bcnt) {
171 cursize += fu->bcnt * fu->reps;
172 continue;
174 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
175 if (*fmt != '%')
176 continue;
178 * skip any special chars -- save precision in
179 * case it's a %s format.
181 while (strchr(spec + 1, *++fmt));
182 if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
183 prec = atoi(fmt);
184 while (isdigit((unsigned char)*++fmt));
186 switch(*fmt) {
187 case 'c':
188 bcnt += 1;
189 break;
190 case 'd': case 'i': case 'o': case 'u':
191 case 'x': case 'X':
192 bcnt += 4;
193 break;
194 case 'e': case 'E': case 'f': case 'g': case 'G':
195 bcnt += 8;
196 break;
197 case 's':
198 bcnt += prec;
199 break;
200 case '_':
201 switch(*++fmt) {
202 case 'c': case 'p': case 'u':
203 bcnt += 1;
204 break;
208 cursize += bcnt * fu->reps;
210 return (cursize);
213 void
214 rewrite(FS *fs)
216 enum { NOTOKAY, USEBCNT, USEPREC } sokay;
217 PR *pr, **nextpr;
218 FU *fu;
219 char *p1, *p2;
220 char savech, *fmtp, cs[sizeof(PRId64)];
221 int nconv, prec;
223 prec = 0;
224 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
226 * Break each format unit into print units; each conversion
227 * character gets its own.
229 nextpr = &fu->nextpr;
230 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
231 pr = ecalloc(1, sizeof(*pr));
232 *nextpr = pr;
234 /* Skip preceding text and up to the next % sign. */
235 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
237 /* Only text in the string. */
238 if (!*p1) {
239 pr->fmt = fmtp;
240 pr->flags = F_TEXT;
241 break;
245 * Get precision for %s -- if have a byte count, don't
246 * need it.
248 if (fu->bcnt) {
249 sokay = USEBCNT;
250 /* Skip to conversion character. */
251 for (++p1; *p1 && strchr(spec, *p1); ++p1);
252 } else {
253 /* Skip any special chars, field width. */
254 while (*++p1 && strchr(spec + 1, *p1));
255 if (*p1 == '.' &&
256 isdigit((unsigned char)*++p1)) {
257 sokay = USEPREC;
258 prec = atoi(p1);
259 while (isdigit((unsigned char)*++p1))
260 continue;
261 } else
262 sokay = NOTOKAY;
265 p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */
266 cs[0] = *p1; /* Set conversion string. */
267 cs[1] = '\0';
270 * Figure out the byte count for each conversion;
271 * rewrite the format as necessary, set up blank-
272 * padding for end of data.
274 switch(cs[0]) {
275 case 'c':
276 pr->flags = F_CHAR;
277 switch(fu->bcnt) {
278 case 0: case 1:
279 pr->bcnt = 1;
280 break;
281 default:
282 p1[1] = '\0';
283 badcnt(p1);
285 break;
286 case 'd': case 'i':
287 pr->flags = F_INT;
288 goto isint;
289 case 'o': case 'u': case 'x': case 'X':
290 pr->flags = F_UINT;
291 isint:
293 * Regardless of pr->bcnt, all integer
294 * values are cast to [u]int64_t before
295 * being printed by display(). We
296 * therefore need to use PRI?64 as the
297 * format, where '?' could actually
298 * be any of [diouxX]. We make the
299 * assumption (not guaranteed by the
300 * C99 standard) that we can derive
301 * all the other PRI?64 values from
302 * PRId64 simply by changing the last
303 * character. For example, if PRId64 is
304 * "lld" or "qd", and cs[0] is 'o', then
305 * we end up with "llo" or "qo".
307 savech = cs[0];
308 strncpy(cs, PRId64, sizeof(PRId64) - 2);
309 cs[sizeof(PRId64) - 2] = savech;
310 cs[sizeof(PRId64) - 1] = '\0';
311 switch(fu->bcnt) {
312 case 0: case 4:
313 pr->bcnt = 4;
314 break;
315 case 1:
316 pr->bcnt = 1;
317 break;
318 case 2:
319 pr->bcnt = 2;
320 break;
321 case 8:
322 pr->bcnt = 8;
323 break;
324 default:
325 p1[1] = '\0';
326 badcnt(p1);
328 break;
329 case 'e': case 'E': case 'f': case 'g': case 'G':
330 pr->flags = F_DBL;
331 switch(fu->bcnt) {
332 case 0: case 8:
333 pr->bcnt = 8;
334 break;
335 case 4:
336 pr->bcnt = 4;
337 break;
338 default:
339 p1[1] = '\0';
340 badcnt(p1);
342 break;
343 case 's':
344 pr->flags = F_STR;
345 switch(sokay) {
346 case NOTOKAY:
347 badsfmt();
348 case USEBCNT:
349 pr->bcnt = fu->bcnt;
350 break;
351 case USEPREC:
352 pr->bcnt = prec;
353 break;
355 break;
356 case '_':
357 ++p2;
358 switch(p1[1]) {
359 case 'A':
360 endfu = fu;
361 fu->flags |= F_IGNORE;
362 /* FALLTHROUGH */
363 case 'a':
364 pr->flags = F_ADDRESS;
365 ++p2;
366 switch(p1[2]) {
367 case 'd': case 'o': case'x':
369 * See comments above for
370 * the way we use PRId64.
372 strncpy(cs, PRId64,
373 sizeof(PRId64) - 2);
374 cs[sizeof(PRId64) - 2] = p1[2];
375 cs[sizeof(PRId64) - 1] = '\0';
376 break;
377 default:
378 p1[3] = '\0';
379 badconv(p1);
381 break;
382 case 'c':
383 pr->flags = F_C;
384 /* cs[0] = 'c'; set in conv_c */
385 goto isint2;
386 case 'p':
387 pr->flags = F_P;
388 cs[0] = 'c';
389 goto isint2;
390 case 'u':
391 pr->flags = F_U;
392 /* cs[0] = 'c'; set in conv_u */
393 isint2: switch(fu->bcnt) {
394 case 0: case 1:
395 pr->bcnt = 1;
396 break;
397 default:
398 p1[2] = '\0';
399 badcnt(p1);
401 break;
402 default:
403 p1[2] = '\0';
404 badconv(p1);
406 break;
407 default:
408 p1[1] = '\0';
409 badconv(p1);
413 * Copy to PR format string, set conversion character
414 * pointer, update original.
416 savech = *p2;
417 p1[0] = '\0';
418 pr->fmt = emalloc(strlen(fmtp) + strlen(cs) + 1);
419 (void)strcpy(pr->fmt, fmtp);
420 (void)strcat(pr->fmt, cs);
421 *p2 = savech;
422 pr->cchar = pr->fmt + (p1 - fmtp);
423 fmtp = p2;
425 /* Only one conversion character if byte count. */
426 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
427 errx(1,
428 "byte count with multiple conversion characters");
431 * If format unit byte count not specified, figure it out
432 * so can adjust rep count later.
434 if (!fu->bcnt)
435 for (pr = fu->nextpr; pr; pr = pr->nextpr)
436 fu->bcnt += pr->bcnt;
439 * If the format string interprets any data at all, and it's
440 * not the same as the blocksize, and its last format unit
441 * interprets any data at all, and has no iteration count,
442 * repeat it as necessary.
444 * If, rep count is greater than 1, no trailing whitespace
445 * gets output from the last iteration of the format unit.
447 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
448 if (!fu->nextfu && fs->bcnt < blocksize &&
449 !(fu->flags&F_SETREP) && fu->bcnt)
450 fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
451 if (fu->reps > 1) {
452 if (!fu->nextpr)
453 break;
454 for (pr = fu->nextpr;; pr = pr->nextpr)
455 if (!pr->nextpr)
456 break;
457 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
458 p2 = isspace((unsigned char)*p1) ? p1 : NULL;
459 if (p2)
460 pr->nospace = p2;
463 #ifdef DEBUG
464 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
465 (void)printf("fmt:");
466 for (pr = fu->nextpr; pr; pr = pr->nextpr)
467 (void)printf(" {%s}", pr->fmt);
468 (void)printf("\n");
470 #endif
473 void
474 escape(char *p1)
476 char *p2;
478 /* alphabetic escape sequences have to be done in place */
479 for (p2 = p1;; ++p1, ++p2) {
480 if (!*p1) {
481 *p2 = *p1;
482 break;
484 if (*p1 == '\\')
485 switch(*++p1) {
486 case '\0':
487 *p2 = '\\';
488 *++p2 = '\0';
489 return; /* incomplete escape sequence */
490 case 'a':
491 /* *p2 = '\a'; */
492 *p2 = '\007';
493 break;
494 case 'b':
495 *p2 = '\b';
496 break;
497 case 'f':
498 *p2 = '\f';
499 break;
500 case 'n':
501 *p2 = '\n';
502 break;
503 case 'r':
504 *p2 = '\r';
505 break;
506 case 't':
507 *p2 = '\t';
508 break;
509 case 'v':
510 *p2 = '\v';
511 break;
512 default:
513 *p2 = *p1;
514 break;
516 else
517 *p2 = *p1;
521 void
522 badcnt(char *s)
524 errx(1, "%s: bad byte count", s);
527 void
528 badsfmt(void)
530 errx(1, "%%s: requires a precision or a byte count");
533 void
534 badfmt(const char *fmt)
536 errx(1, "\"%s\": bad format", fmt);
539 void
540 badconv(char *ch)
542 errx(1, "%%%s: bad conversion character", ch);