1 /* $NetBSD: checknr.c,v 1.19 2006/04/25 19:25:19 christos Exp $ */
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\
35 The Regents of the University of California. All rights reserved.");
40 static char sccsid
[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
42 __RCSID("$NetBSD: checknr.c,v 1.19 2006/04/25 19:25:19 christos Exp $");
47 * checknr: check an nroff/troff input file for matching macro calls.
48 * we also attempt to match size and font changes, but only the embedded
49 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
50 * later but for now think of these restrictions as contributions to
51 * structured typesetting.
59 #define MAXSTK 100 /* Stack size */
60 #define MAXBR 100 /* Max number of bracket pairs known */
61 #define MAXCMDS 500 /* Max number of commands known */
64 * The stack on which we remember what we've seen so far.
67 int opno
; /* number of opening bracket */
68 int pl
; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
69 int parm
; /* parm to size, font, etc */
70 int lno
; /* line number the thing came in in */
75 * The kinds of opening and closing brackets.
81 /* A few bare bones troff commands */
83 { "sz", "sz"}, /* also \s */
85 { "ft", "ft"}, /* also \f */
115 /* The -me package */
124 /* The -mdoc package */
137 /* Things needed by preprocessors */
146 * All commands known to nroff, plus macro packages.
147 * Used so we can complain about unrecognized commands.
149 char *knowncmds
[MAXCMDS
] = {
150 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N",
151 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q",
152 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x",
153 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D",
154 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p",
155 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT",
156 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" , "B1",
157 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf",
158 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT",
159 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc",
160 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM",
161 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er",
162 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ",
163 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx",
164 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM",
165 "IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF", "KQ", "KS", "LB",
166 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF",
167 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd",
168 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op",
169 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY",
170 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql",
171 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT",
172 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM",
173 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy",
174 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ",
175 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
176 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo",
177 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>",
178 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am",
179 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx",
180 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de",
181 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el",
182 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft",
183 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie",
184 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
185 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
186 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr",
187 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn",
188 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro",
189 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st",
190 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u",
191 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0
194 int lineno
; /* current line number in input file */
195 char *cfilename
; /* name of current file */
196 int nfiles
; /* number of files to process */
197 int fflag
; /* -f: ignore \f */
198 int sflag
; /* -s: ignore \s */
199 int ncmds
; /* size of knowncmds */
200 int slot
; /* slot in knowncmds found by binsrch */
205 void checkknown(char *);
206 void chkcmd(char *, char *);
208 int eq(const void *, const void *);
209 int main(int, char **);
210 void nomatch(char *);
212 void process(FILE *);
217 main(int argc
, char **argv
)
224 /* Figure out how many known commands there are */
225 while (knowncmds
[ncmds
])
227 while (argc
> 1 && argv
[1][0] == '-') {
230 /* -a: add pairs of macros */
232 i
= strlen(argv
[1]) - 2;
235 /* look for empty macro slots */
236 for (i
=0; br
[i
].opbr
; i
++)
238 for (cp
=argv
[1]+3; cp
[-1]; cp
+= 6) {
240 errx(1, "too many pairs");
241 if ((br
[i
].opbr
= malloc(3)) == NULL
)
243 strlcpy(br
[i
].opbr
, cp
, 3);
244 if ((br
[i
].clbr
= malloc(3)) == NULL
)
246 strlcpy(br
[i
].clbr
, cp
+3, 3);
247 addmac(br
[i
].opbr
); /* knows pairs are also known cmds */
253 /* -c: add known commands */
255 i
= strlen(argv
[1]) - 2;
258 for (cp
=argv
[1]+3; cp
[-1]; cp
+= 3) {
259 if (cp
[2] && cp
[2] != '.')
266 /* -f: ignore font changes */
271 /* -s: ignore size changes */
284 for (i
=1; i
<argc
; i
++) {
286 f
= fopen(cfilename
, "r");
304 (void)fprintf(stderr
,
305 "usage: %s [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] file\n",
314 char line
[256]; /* the current line */
315 char mac
[5]; /* The current macro or nroff command */
319 for (lineno
= 1; fgets(line
, sizeof line
, f
); lineno
++) {
320 if (line
[0] == '.') {
322 * find and isolate the macro/command name.
324 strncpy(mac
, line
+1, 4);
325 if (isspace((unsigned char)mac
[0])) {
327 printf("Empty command\n");
328 } else if (isspace((unsigned char)mac
[1])) {
330 } else if (isspace((unsigned char)mac
[2])) {
332 } else if (mac
[0] != '\\' || mac
[1] != '\"') {
334 printf("Command too long\n");
338 * Is it a known command?
352 * At this point we process the line looking
355 for (i
=0; line
[i
]; i
++)
356 if (line
[i
]=='\\' && (i
==0 || line
[i
-1]!='\\')) {
357 if (!sflag
&& line
[++i
]=='s') {
359 if (isdigit((unsigned char)pl
)) {
364 while (isdigit((unsigned char)line
[++i
]))
365 n
= 10 * n
+ line
[i
] - '0';
369 stk
[stktop
].opno
== SZ
) {
373 printf("unmatched \\s0\n");
376 stk
[++stktop
].opno
= SZ
;
378 stk
[stktop
].parm
= n
;
379 stk
[stktop
].lno
= lineno
;
381 } else if (!fflag
&& line
[i
]=='f') {
385 stk
[stktop
].opno
== FT
) {
389 printf("unmatched \\fP\n");
392 stk
[++stktop
].opno
= FT
;
394 stk
[stktop
].parm
= n
;
395 stk
[stktop
].lno
= lineno
;
401 * We've hit the end and look at all this stuff that hasn't been
402 * matched yet! Complain, complain.
404 for (i
=stktop
; i
>=0; i
--) {
413 printf("Unmatched ");
422 printf(".%s", br
[stk
[i
].opno
].opbr
);
423 else switch(stk
[i
].opno
) {
425 printf("\\s%c%d", stk
[i
].pl
, stk
[i
].parm
);
428 printf("\\f%c", stk
[i
].parm
);
431 printf("Bug: stk[%d].opno = %d = .%s, .%s",
432 i
, stk
[i
].opno
, br
[stk
[i
].opno
].opbr
,
433 br
[stk
[i
].opno
].clbr
);
438 chkcmd(char *line
, char *mac
)
443 * Check to see if it matches top of stack.
445 if (stktop
>= 0 && eq(mac
, br
[stk
[stktop
].opno
].clbr
))
446 stktop
--; /* OK. Pop & forget */
448 /* No. Maybe it's an opener */
449 for (i
=0; br
[i
].opbr
; i
++) {
450 if (eq(mac
, br
[i
].opbr
)) {
451 /* Found. Push it. */
453 stk
[stktop
].opno
= i
;
455 stk
[stktop
].parm
= 0;
456 stk
[stktop
].lno
= lineno
;
460 * Maybe it's an unmatched closer.
461 * NOTE: this depends on the fact
462 * that none of the closers can be
465 if (eq(mac
, br
[i
].clbr
)) {
479 * Look for a match further down on stack
480 * If we find one, it suggests that the stuff in
481 * between is supposed to match itself.
483 for (j
=stktop
; j
>=0; j
--)
484 if (eq(mac
,br
[stk
[j
].opno
].clbr
)) {
485 /* Found. Make a good diagnostic. */
488 * Check for special case \fx..\fR and don't
491 if (stk
[j
+1].opno
==FT
&& stk
[j
+1].parm
!='R'
492 && stk
[j
+2].opno
==FT
&& stk
[j
+2].parm
=='R') {
497 * We have two unmatched frobs. Chances are
498 * they were intended to match, so we mention
503 printf(" does not match %d: ", stk
[j
+2].lno
);
506 } else for (i
=j
+1; i
<= stktop
; i
++) {
512 /* Didn't find one. Throw this away. */
514 printf("Unmatched .%s\n", mac
);
517 /* eq: are two strings equal? */
519 eq(const void *s1
, const void *s2
)
521 return (strcmp((char *)s1
, (char *)s2
) == 0);
524 /* print the first part of an error message, given the line number */
529 printf("%s: ", cfilename
);
530 printf("%d: ", pelineno
);
534 checkknown(char *mac
)
539 if (binsrch(mac
) >= 0)
541 if (mac
[0] == '\\' && mac
[1] == '"') /* comments */
545 printf("Unknown command: .%s\n", mac
);
549 * We have a .de xx line in "line". Add xx to the list of known commands.
556 /* grab the macro being defined */
558 while (isspace((unsigned char)*mac
))
562 printf("illegal define: %s\n", line
);
566 if (isspace((unsigned char)mac
[1]) || mac
[1] == '\\')
568 if (ncmds
>= MAXCMDS
) {
569 printf("Only %d known commands allowed\n", MAXCMDS
);
576 * Add mac to the list. We should really have some kind of tree
577 * structure here but this is a quick-and-dirty job and I just don't
578 * have time to mess with it. (I wonder if this will come back to haunt
579 * me someday?) Anyway, I claim that .de is fairly rare in user
580 * nroff programs, and the register loop below is pretty fast.
585 char **src
, **dest
, **loc
;
587 if (binsrch(mac
) >= 0){ /* it's OK to redefine something */
589 printf("binsrch(%s) -> already in table\n", mac
);
593 /* binsrch sets slot as a side effect */
595 printf("binsrch(%s) -> %d\n", mac
, slot
);
597 loc
= &knowncmds
[slot
];
598 src
= &knowncmds
[ncmds
-1];
602 if ((*loc
= strdup(mac
)) == NULL
)
606 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds
[slot
-2],
607 knowncmds
[slot
-1], knowncmds
[slot
], knowncmds
[slot
+1],
608 knowncmds
[slot
+2], ncmds
);
613 * Do a binary search in knowncmds for mac.
614 * If found, return the index. If not, return -1.
619 char *p
; /* pointer to current cmd in list */
620 int d
; /* difference if any */
621 int mid
; /* mid point in binary search */
622 int top
, bot
; /* boundaries of bin search, inclusive */
639 slot
= bot
; /* place it would have gone */