1 /* $NetBSD: checknr.c,v 1.24 2013/08/12 14:03:18 joerg Exp $ */
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\
35 The Regents of the University of California. All rights reserved.");
40 static char sccsid
[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
42 __RCSID("$NetBSD: checknr.c,v 1.24 2013/08/12 14:03:18 joerg Exp $");
47 * checknr: check an nroff/troff input file for matching macro calls.
48 * we also attempt to match size and font changes, but only the embedded
49 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
50 * later but for now think of these restrictions as contributions to
51 * structured typesetting.
59 #define MAXSTK 100 /* Stack size */
60 #define MAXBR 100 /* Max number of bracket pairs known */
61 #define MAXCMDS 500 /* Max number of commands known */
64 * The stack on which we remember what we've seen so far.
66 static struct stkstr
{
67 int opno
; /* number of opening bracket */
68 int pl
; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
69 int parm
; /* parm to size, font, etc */
70 int lno
; /* line number the thing came in in */
75 * The kinds of opening and closing brackets.
81 /* A few bare bones troff commands */
83 { "sz", "sz"}, /* also \s */
85 { "ft", "ft"}, /* also \f */
115 /* The -me package */
124 /* The -mdoc package */
137 /* Things needed by preprocessors */
146 * All commands known to nroff, plus macro packages.
147 * Used so we can complain about unrecognized commands.
149 static const char *knowncmds
[MAXCMDS
] = {
150 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N",
151 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q",
152 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x",
153 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D",
154 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p",
155 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT",
156 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" , "B1",
157 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf",
158 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT",
159 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc",
160 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM",
161 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er",
162 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ",
163 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx",
164 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM",
165 "IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF", "KQ", "KS", "LB",
166 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF",
167 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd",
168 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op",
169 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY",
170 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql",
171 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT",
172 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM",
173 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy",
174 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ",
175 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
176 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo",
177 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>",
178 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am",
179 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx",
180 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de",
181 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el",
182 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft",
183 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie",
184 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
185 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
186 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr",
187 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn",
188 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro",
189 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st",
190 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u",
191 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0
194 static int lineno
; /* current line number in input file */
195 static const char *cfilename
; /* name of current file */
196 static int nfiles
; /* number of files to process */
197 static int fflag
; /* -f: ignore \f */
198 static int sflag
; /* -s: ignore \s */
199 static int ncmds
; /* size of knowncmds */
200 static int slot
; /* slot in knowncmds found by binsrch */
202 static void addcmd(char *);
203 static void addmac(const char *);
204 static int binsrch(const char *);
205 static void checkknown(const char *);
206 static void chkcmd(const char *);
207 static void complain(int);
208 static int eq(const char *, const char *);
209 static void nomatch(const char *);
211 static void process(FILE *);
212 static void prop(int);
213 static void usage(void) __dead
;
216 main(int argc
, char **argv
)
223 /* Figure out how many known commands there are */
224 while (knowncmds
[ncmds
])
226 while (argc
> 1 && argv
[1][0] == '-') {
229 /* -a: add pairs of macros */
231 i
= strlen(argv
[1]) - 2;
234 /* look for empty macro slots */
235 for (i
=0; br
[i
].opbr
; i
++)
237 for (cp
=argv
[1]+3; cp
[-1]; cp
+= 6) {
241 errx(1, "too many pairs");
242 if ((tmp
= malloc(3)) == NULL
)
246 if ((tmp
= malloc(3)) == NULL
)
248 strlcpy(tmp
, cp
+3, 3);
250 addmac(br
[i
].opbr
); /* knows pairs are also known cmds */
256 /* -c: add known commands */
258 i
= strlen(argv
[1]) - 2;
261 for (cp
=argv
[1]+3; cp
[-1]; cp
+= 3) {
262 if (cp
[2] && cp
[2] != '.')
269 /* -f: ignore font changes */
274 /* -s: ignore size changes */
287 for (i
=1; i
<argc
; i
++) {
289 f
= fopen(cfilename
, "r");
307 (void)fprintf(stderr
,
308 "usage: %s [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] file\n",
317 char line
[256]; /* the current line */
318 char mac
[5]; /* The current macro or nroff command */
322 for (lineno
= 1; fgets(line
, sizeof line
, f
); lineno
++) {
323 if (line
[0] == '.') {
325 * find and isolate the macro/command name.
327 strncpy(mac
, line
+1, 4);
328 if (isspace((unsigned char)mac
[0])) {
330 printf("Empty command\n");
331 } else if (isspace((unsigned char)mac
[1])) {
333 } else if (isspace((unsigned char)mac
[2])) {
335 } else if (mac
[0] != '\\' || mac
[1] != '\"') {
337 printf("Command too long\n");
341 * Is it a known command?
355 * At this point we process the line looking
358 for (i
=0; line
[i
]; i
++)
359 if (line
[i
]=='\\' && (i
==0 || line
[i
-1]!='\\')) {
360 if (!sflag
&& line
[++i
]=='s') {
362 if (isdigit((unsigned char)pl
)) {
367 while (isdigit((unsigned char)line
[++i
]))
368 n
= 10 * n
+ line
[i
] - '0';
372 stk
[stktop
].opno
== SZ
) {
376 printf("unmatched \\s0\n");
379 stk
[++stktop
].opno
= SZ
;
381 stk
[stktop
].parm
= n
;
382 stk
[stktop
].lno
= lineno
;
384 } else if (!fflag
&& line
[i
]=='f') {
388 stk
[stktop
].opno
== FT
) {
392 printf("unmatched \\fP\n");
395 stk
[++stktop
].opno
= FT
;
397 stk
[stktop
].parm
= n
;
398 stk
[stktop
].lno
= lineno
;
404 * We've hit the end and look at all this stuff that hasn't been
405 * matched yet! Complain, complain.
407 for (i
=stktop
; i
>=0; i
--) {
416 printf("Unmatched ");
425 printf(".%s", br
[stk
[i
].opno
].opbr
);
426 else switch(stk
[i
].opno
) {
428 printf("\\s%c%d", stk
[i
].pl
, stk
[i
].parm
);
431 printf("\\f%c", stk
[i
].parm
);
434 printf("Bug: stk[%d].opno = %d = .%s, .%s",
435 i
, stk
[i
].opno
, br
[stk
[i
].opno
].opbr
,
436 br
[stk
[i
].opno
].clbr
);
441 chkcmd(const char *mac
)
446 * Check to see if it matches top of stack.
448 if (stktop
>= 0 && eq(mac
, br
[stk
[stktop
].opno
].clbr
))
449 stktop
--; /* OK. Pop & forget */
451 /* No. Maybe it's an opener */
452 for (i
=0; br
[i
].opbr
; i
++) {
453 if (eq(mac
, br
[i
].opbr
)) {
454 /* Found. Push it. */
456 stk
[stktop
].opno
= i
;
458 stk
[stktop
].parm
= 0;
459 stk
[stktop
].lno
= lineno
;
463 * Maybe it's an unmatched closer.
464 * NOTE: this depends on the fact
465 * that none of the closers can be
468 if (eq(mac
, br
[i
].clbr
)) {
477 nomatch(const char *mac
)
482 * Look for a match further down on stack
483 * If we find one, it suggests that the stuff in
484 * between is supposed to match itself.
486 for (j
=stktop
; j
>=0; j
--)
487 if (eq(mac
,br
[stk
[j
].opno
].clbr
)) {
488 /* Found. Make a good diagnostic. */
491 * Check for special case \fx..\fR and don't
494 if (stk
[j
+1].opno
==FT
&& stk
[j
+1].parm
!='R'
495 && stk
[j
+2].opno
==FT
&& stk
[j
+2].parm
=='R') {
500 * We have two unmatched frobs. Chances are
501 * they were intended to match, so we mention
506 printf(" does not match %d: ", stk
[j
+2].lno
);
509 } else for (i
=j
+1; i
<= stktop
; i
++) {
515 /* Didn't find one. Throw this away. */
517 printf("Unmatched .%s\n", mac
);
520 /* eq: are two strings equal? */
522 eq(const char *s1
, const char *s2
)
524 return strcmp(s1
, s2
) == 0;
527 /* print the first part of an error message, given the line number */
532 printf("%s: ", cfilename
);
533 printf("%d: ", pelineno
);
537 checkknown(const char *mac
)
542 if (binsrch(mac
) >= 0)
544 if (mac
[0] == '\\' && mac
[1] == '"') /* comments */
548 printf("Unknown command: .%s\n", mac
);
552 * We have a .de xx line in "line". Add xx to the list of known commands.
559 /* grab the macro being defined */
561 while (isspace((unsigned char)*mac
))
565 printf("illegal define: %s\n", line
);
569 if (isspace((unsigned char)mac
[1]) || mac
[1] == '\\')
571 if (ncmds
>= MAXCMDS
) {
572 printf("Only %d known commands allowed\n", MAXCMDS
);
579 * Add mac to the list. We should really have some kind of tree
580 * structure here but this is a quick-and-dirty job and I just don't
581 * have time to mess with it. (I wonder if this will come back to haunt
582 * me someday?) Anyway, I claim that .de is fairly rare in user
583 * nroff programs, and the register loop below is pretty fast.
586 addmac(const char *mac
)
588 const char **src
, **dest
, **loc
;
590 if (binsrch(mac
) >= 0){ /* it's OK to redefine something */
592 printf("binsrch(%s) -> already in table\n", mac
);
596 /* binsrch sets slot as a side effect */
598 printf("binsrch(%s) -> %d\n", mac
, slot
);
600 loc
= &knowncmds
[slot
];
601 src
= &knowncmds
[ncmds
-1];
605 if ((*loc
= strdup(mac
)) == NULL
)
609 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds
[slot
-2],
610 knowncmds
[slot
-1], knowncmds
[slot
], knowncmds
[slot
+1],
611 knowncmds
[slot
+2], ncmds
);
616 * Do a binary search in knowncmds for mac.
617 * If found, return the index. If not, return -1.
620 binsrch(const char *mac
)
622 const char *p
; /* pointer to current cmd in list */
623 int d
; /* difference if any */
624 int mid
; /* mid point in binary search */
625 int top
, bot
; /* boundaries of bin search, inclusive */
642 slot
= bot
; /* place it would have gone */