vm: fix region reporting bug
[minix.git] / commands / hexdump / parse.c
blobcc786f9090ded05dade6c207a8aea3f5fc8b88de
1 /* $NetBSD: parse.c,v 1.26 2009/01/18 21:34:32 apb Exp $ */
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
36 #include <sys/cdefs.h>
37 #if 0
38 #if !defined(lint)
39 #if 0
40 static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93";
41 #else
42 __RCSID("$NetBSD: parse.c,v 1.26 2009/01/18 21:34:32 apb Exp $");
43 #endif
44 #endif /* not lint */
45 #endif
47 #include <sys/types.h>
48 #include <sys/file.h>
50 #include <ctype.h>
51 #include <err.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <inttypes.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <util.h>
60 #include "hexdump.h"
62 FU *endfu; /* format at end-of-data */
64 void
65 addfile(char *name)
67 char *p;
68 FILE *fp;
69 int ch;
70 char buf[2048 + 1];
72 if ((fp = fopen(name, "r")) == NULL)
73 err(1, "fopen %s", name);
74 while (fgets(buf, sizeof(buf), fp)) {
75 if (!(p = strchr(buf, '\n'))) {
76 warnx("line too long.");
77 while ((ch = getchar()) != '\n' && ch != EOF);
78 continue;
80 *p = '\0';
81 for (p = buf; *p && isspace((unsigned char)*p); ++p);
82 if (!*p || *p == '#')
83 continue;
84 add(p);
86 (void)fclose(fp);
89 void
90 add(const char *fmt)
92 const char *p;
93 static FS **nextfs;
94 FS *tfs;
95 FU *tfu, **nextfu;
96 const char *savep;
98 /* start new linked list of format units */
99 tfs = ecalloc(1, sizeof(FS));
100 if (!fshead)
101 fshead = tfs;
102 else
103 *nextfs = tfs;
104 nextfs = &tfs->nextfs;
105 nextfu = &tfs->nextfu;
107 /* take the format string and break it up into format units */
108 for (p = fmt;;) {
109 /* skip leading white space */
110 for (; isspace((unsigned char)*p); ++p);
111 if (!*p)
112 break;
114 /* allocate a new format unit and link it in */
115 tfu = ecalloc(1, sizeof(FU));
116 *nextfu = tfu;
117 nextfu = &tfu->nextfu;
118 tfu->reps = 1;
120 /* if leading digit, repetition count */
121 if (isdigit((unsigned char)*p)) {
122 for (savep = p; isdigit((unsigned char)*p); ++p);
123 if (!isspace((unsigned char)*p) && *p != '/')
124 badfmt(fmt);
125 /* may overwrite either white space or slash */
126 tfu->reps = atoi(savep);
127 tfu->flags = F_SETREP;
128 /* skip trailing white space */
129 for (++p; isspace((unsigned char)*p); ++p);
132 /* skip slash and trailing white space */
133 if (*p == '/')
134 while (isspace((unsigned char)*++p));
136 /* byte count */
137 if (isdigit((unsigned char)*p)) {
138 for (savep = p; isdigit((unsigned char)*p); ++p);
139 if (!isspace((unsigned char)*p))
140 badfmt(fmt);
141 tfu->bcnt = atoi(savep);
142 /* skip trailing white space */
143 for (++p; isspace((unsigned char)*p); ++p);
146 /* format */
147 if (*p != '"')
148 badfmt(fmt);
149 for (savep = ++p; *p != '"';)
150 if (*p++ == 0)
151 badfmt(fmt);
152 tfu->fmt = emalloc(p - savep + 1);
153 (void) strncpy(tfu->fmt, savep, p - savep);
154 tfu->fmt[p - savep] = '\0';
155 escape(tfu->fmt);
156 p++;
160 static const char *spec = ".#-+ 0123456789";
163 size(FS *fs)
165 FU *fu;
166 int bcnt, cursize;
167 char *fmt;
168 int prec;
170 /* figure out the data block size needed for each format unit */
171 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
172 if (fu->bcnt) {
173 cursize += fu->bcnt * fu->reps;
174 continue;
176 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
177 if (*fmt != '%')
178 continue;
180 * skip any special chars -- save precision in
181 * case it's a %s format.
183 while (strchr(spec + 1, *++fmt));
184 if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
185 prec = atoi(fmt);
186 while (isdigit((unsigned char)*++fmt));
188 switch(*fmt) {
189 case 'c':
190 bcnt += 1;
191 break;
192 case 'd': case 'i': case 'o': case 'u':
193 case 'x': case 'X':
194 bcnt += 4;
195 break;
196 case 'e': case 'E': case 'f': case 'g': case 'G':
197 bcnt += 8;
198 break;
199 case 's':
200 bcnt += prec;
201 break;
202 case '_':
203 switch(*++fmt) {
204 case 'c': case 'p': case 'u':
205 bcnt += 1;
206 break;
210 cursize += bcnt * fu->reps;
212 return (cursize);
215 void
216 rewrite(FS *fs)
218 enum { NOTOKAY, USEBCNT, USEPREC } sokay;
219 PR *pr, **nextpr;
220 FU *fu;
221 char *p1, *p2;
222 char savech, *fmtp, cs[sizeof(PRId64)];
223 int nconv, prec;
225 prec = 0;
226 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
228 * Break each format unit into print units; each conversion
229 * character gets its own.
231 nextpr = &fu->nextpr;
232 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
233 pr = ecalloc(1, sizeof(*pr));
234 *nextpr = pr;
236 /* Skip preceding text and up to the next % sign. */
237 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
239 /* Only text in the string. */
240 if (!*p1) {
241 pr->fmt = fmtp;
242 pr->flags = F_TEXT;
243 break;
247 * Get precision for %s -- if have a byte count, don't
248 * need it.
250 if (fu->bcnt) {
251 sokay = USEBCNT;
252 /* Skip to conversion character. */
253 for (++p1; *p1 && strchr(spec, *p1); ++p1);
254 } else {
255 /* Skip any special chars, field width. */
256 while (*++p1 && strchr(spec + 1, *p1));
257 if (*p1 == '.' &&
258 isdigit((unsigned char)*++p1)) {
259 sokay = USEPREC;
260 prec = atoi(p1);
261 while (isdigit((unsigned char)*++p1))
262 continue;
263 } else
264 sokay = NOTOKAY;
267 p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */
268 cs[0] = *p1; /* Set conversion string. */
269 cs[1] = '\0';
272 * Figure out the byte count for each conversion;
273 * rewrite the format as necessary, set up blank-
274 * padding for end of data.
276 switch(cs[0]) {
277 case 'c':
278 pr->flags = F_CHAR;
279 switch(fu->bcnt) {
280 case 0: case 1:
281 pr->bcnt = 1;
282 break;
283 default:
284 p1[1] = '\0';
285 badcnt(p1);
287 break;
288 case 'd': case 'i':
289 pr->flags = F_INT;
290 goto isint;
291 case 'o': case 'u': case 'x': case 'X':
292 pr->flags = F_UINT;
293 isint:
295 * Regardless of pr->bcnt, all integer
296 * values are cast to [u]int64_t before
297 * being printed by display(). We
298 * therefore need to use PRI?64 as the
299 * format, where '?' could actually
300 * be any of [diouxX]. We make the
301 * assumption (not guaranteed by the
302 * C99 standard) that we can derive
303 * all the other PRI?64 values from
304 * PRId64 simply by changing the last
305 * character. For example, if PRId64 is
306 * "lld" or "qd", and cs[0] is 'o', then
307 * we end up with "llo" or "qo".
309 savech = cs[0];
310 strncpy(cs, PRId64, sizeof(PRId64) - 2);
311 cs[sizeof(PRId64) - 2] = savech;
312 cs[sizeof(PRId64) - 1] = '\0';
313 switch(fu->bcnt) {
314 case 0: case 4:
315 pr->bcnt = 4;
316 break;
317 case 1:
318 pr->bcnt = 1;
319 break;
320 case 2:
321 pr->bcnt = 2;
322 break;
323 case 8:
324 pr->bcnt = 8;
325 break;
326 default:
327 p1[1] = '\0';
328 badcnt(p1);
330 break;
331 case 'e': case 'E': case 'f': case 'g': case 'G':
332 pr->flags = F_DBL;
333 switch(fu->bcnt) {
334 case 0: case 8:
335 pr->bcnt = 8;
336 break;
337 case 4:
338 pr->bcnt = 4;
339 break;
340 default:
341 p1[1] = '\0';
342 badcnt(p1);
344 break;
345 case 's':
346 pr->flags = F_STR;
347 switch(sokay) {
348 case NOTOKAY:
349 badsfmt();
350 case USEBCNT:
351 pr->bcnt = fu->bcnt;
352 break;
353 case USEPREC:
354 pr->bcnt = prec;
355 break;
357 break;
358 case '_':
359 ++p2;
360 switch(p1[1]) {
361 case 'A':
362 endfu = fu;
363 fu->flags |= F_IGNORE;
364 /* FALLTHROUGH */
365 case 'a':
366 pr->flags = F_ADDRESS;
367 ++p2;
368 switch(p1[2]) {
369 case 'd': case 'o': case'x':
371 * See comments above for
372 * the way we use PRId64.
374 strncpy(cs, PRId64,
375 sizeof(PRId64) - 2);
376 cs[sizeof(PRId64) - 2] = p1[2];
377 cs[sizeof(PRId64) - 1] = '\0';
378 break;
379 default:
380 p1[3] = '\0';
381 badconv(p1);
383 break;
384 case 'c':
385 pr->flags = F_C;
386 /* cs[0] = 'c'; set in conv_c */
387 goto isint2;
388 case 'p':
389 pr->flags = F_P;
390 cs[0] = 'c';
391 goto isint2;
392 case 'u':
393 pr->flags = F_U;
394 /* cs[0] = 'c'; set in conv_u */
395 isint2: switch(fu->bcnt) {
396 case 0: case 1:
397 pr->bcnt = 1;
398 break;
399 default:
400 p1[2] = '\0';
401 badcnt(p1);
403 break;
404 default:
405 p1[2] = '\0';
406 badconv(p1);
408 break;
409 default:
410 p1[1] = '\0';
411 badconv(p1);
415 * Copy to PR format string, set conversion character
416 * pointer, update original.
418 savech = *p2;
419 p1[0] = '\0';
420 pr->fmt = emalloc(strlen(fmtp) + strlen(cs) + 1);
421 (void)strcpy(pr->fmt, fmtp);
422 (void)strcat(pr->fmt, cs);
423 *p2 = savech;
424 pr->cchar = pr->fmt + (p1 - fmtp);
425 fmtp = p2;
427 /* Only one conversion character if byte count. */
428 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
429 errx(1,
430 "byte count with multiple conversion characters");
433 * If format unit byte count not specified, figure it out
434 * so can adjust rep count later.
436 if (!fu->bcnt)
437 for (pr = fu->nextpr; pr; pr = pr->nextpr)
438 fu->bcnt += pr->bcnt;
441 * If the format string interprets any data at all, and it's
442 * not the same as the blocksize, and its last format unit
443 * interprets any data at all, and has no iteration count,
444 * repeat it as necessary.
446 * If, rep count is greater than 1, no trailing whitespace
447 * gets output from the last iteration of the format unit.
449 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
450 if (!fu->nextfu && fs->bcnt < blocksize &&
451 !(fu->flags&F_SETREP) && fu->bcnt)
452 fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
453 if (fu->reps > 1) {
454 if (!fu->nextpr)
455 break;
456 for (pr = fu->nextpr;; pr = pr->nextpr)
457 if (!pr->nextpr)
458 break;
459 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
460 p2 = isspace((unsigned char)*p1) ? p1 : NULL;
461 if (p2)
462 pr->nospace = p2;
465 #ifdef DEBUG
466 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
467 (void)printf("fmt:");
468 for (pr = fu->nextpr; pr; pr = pr->nextpr)
469 (void)printf(" {%s}", pr->fmt);
470 (void)printf("\n");
472 #endif
475 void
476 escape(char *p1)
478 char *p2;
480 /* alphabetic escape sequences have to be done in place */
481 for (p2 = p1;; ++p1, ++p2) {
482 if (!*p1) {
483 *p2 = *p1;
484 break;
486 if (*p1 == '\\')
487 switch(*++p1) {
488 case '\0':
489 *p2 = '\\';
490 *++p2 = '\0';
491 return; /* incomplete escape sequence */
492 case 'a':
493 /* *p2 = '\a'; */
494 *p2 = '\007';
495 break;
496 case 'b':
497 *p2 = '\b';
498 break;
499 case 'f':
500 *p2 = '\f';
501 break;
502 case 'n':
503 *p2 = '\n';
504 break;
505 case 'r':
506 *p2 = '\r';
507 break;
508 case 't':
509 *p2 = '\t';
510 break;
511 case 'v':
512 *p2 = '\v';
513 break;
514 default:
515 *p2 = *p1;
516 break;
518 else
519 *p2 = *p1;
523 void
524 badcnt(char *s)
526 errx(1, "%s: bad byte count", s);
529 void
530 badsfmt(void)
532 errx(1, "%%s: requires a precision or a byte count");
535 void
536 badfmt(const char *fmt)
538 errx(1, "\"%s\": bad format", fmt);
541 void
542 badconv(char *ch)
544 errx(1, "%%%s: bad conversion character", ch);