1 /* $Vendor-Id: main.c,v 1.135 2011/01/04 15:02:00 kristaps Exp $ */
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
46 #define REPARSE_LIMIT 1000
47 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
49 /* FIXME: Intel's compiler? LLVM? pcc? */
51 #if !defined(__GNUC__) || (__GNUC__ < 2)
53 # define __attribute__(x)
55 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
57 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
58 typedef void (*out_man
)(void *, const struct man
*);
59 typedef void (*out_free
)(void *);
83 const char *file
; /* Current parse. */
84 int fd
; /* Current parse. */
85 int line
; /* Line number in the file. */
86 enum mandoclevel wlevel
; /* Ignore messages below this. */
87 int wstop
; /* Stop after a file with a warning. */
88 enum intt inttype
; /* which parser to use */
89 struct man
*pman
; /* persistent man parser */
90 struct mdoc
*pmdoc
; /* persistent mdoc parser */
91 struct man
*man
; /* man parser */
92 struct mdoc
*mdoc
; /* mdoc parser */
93 struct roff
*roff
; /* roff parser (!NULL) */
94 struct regset regs
; /* roff registers */
95 int reparse_count
; /* finite interpolation stack */
96 enum outt outtype
; /* which output to use */
97 out_mdoc outmdoc
; /* mdoc output ptr */
98 out_man outman
; /* man output ptr */
99 out_free outfree
; /* free output ptr */
100 void *outdata
; /* data for output */
101 char outopts
[BUFSIZ
]; /* buf of output opts */
104 static const char * const mandoclevels
[MANDOCLEVEL_MAX
] = {
114 static const enum mandocerr mandoclimits
[MANDOCLEVEL_MAX
] = {
124 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
129 /* related to the prologue */
130 "no title in document",
131 "document title should be all caps",
132 "unknown manual section",
133 "cannot parse date argument",
134 "prologue macros out of order",
135 "duplicate prologue macro",
136 "macro not allowed in prologue",
137 "macro not allowed in body",
139 /* related to document structure */
140 ".so is fragile, better use ln(1)",
141 "NAME section must come first",
142 "bad NAME section contents",
143 "manual name not yet set",
144 "sections out of conventional order",
145 "duplicate section name",
146 "section not in conventional manual section",
148 /* related to macros and nesting */
149 "skipping obsolete macro",
150 "skipping paragraph macro",
151 "blocks badly nested",
152 "child violates parent syntax",
153 "nested displays are not portable",
154 "already in literal mode",
156 /* related to missing macro arguments */
157 "skipping empty macro",
158 "argument count wrong",
159 "missing display type",
160 "list type must come first",
161 "tag lists require a width argument",
164 /* related to bad macro arguments */
166 "duplicate argument",
167 "duplicate display type",
168 "duplicate list type",
169 "unknown AT&T UNIX version",
172 "unknown standard specifier",
173 "bad width argument",
175 /* related to plain text */
176 "blank line in non-literal context",
177 "tab in non-literal context",
178 "end of line whitespace",
180 "unknown escape sequence",
181 "unterminated quoted string",
183 /* related to tables */
188 /* related to tables */
192 "no table layout cells specified",
193 "no table data cells specified",
194 "ignore data in cell",
195 "data block still open",
197 "input stack limit exceeded, infinite loop?",
198 "skipping bad character",
199 "skipping text before the first section header",
200 "skipping unknown macro",
201 "NOT IMPLEMENTED: skipping request",
203 "argument count wrong",
204 "skipping end of block that is not open",
205 "missing end of block",
206 "scope open on exit",
207 "uname(3) system call failed",
208 "macro requires line argument(s)",
209 "macro requires body argument(s)",
210 "macro requires argument(s)",
212 "line argument(s) will be lost",
213 "body argument(s) will be lost",
215 "generic fatal error",
217 "column syntax is inconsistent",
218 "NOT IMPLEMENTED: .Bd -file",
219 "line scope broken, syntax violated",
220 "argument count wrong, violates syntax",
221 "child violates parent syntax",
222 "argument count wrong, violates syntax",
223 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
225 "no document prologue",
226 "static buffer exhausted",
229 static void parsebuf(struct curparse
*, struct buf
, int);
230 static void pdesc(struct curparse
*);
231 static void fdesc(struct curparse
*);
232 static void ffile(const char *, struct curparse
*);
233 static int pfile(const char *, struct curparse
*);
234 static int moptions(enum intt
*, char *);
235 static int mmsg(enum mandocerr
, void *,
236 int, int, const char *);
237 static void pset(const char *, int, struct curparse
*);
238 static int toptions(struct curparse
*, char *);
239 static void usage(void) __attribute__((noreturn
));
240 static void version(void) __attribute__((noreturn
));
241 static int woptions(struct curparse
*, char *);
243 static const char *progname
;
244 static enum mandoclevel file_status
= MANDOCLEVEL_OK
;
245 static enum mandoclevel exit_status
= MANDOCLEVEL_OK
;
248 main(int argc
, char *argv
[])
251 struct curparse curp
;
253 progname
= strrchr(argv
[0], '/');
254 if (progname
== NULL
)
259 memset(&curp
, 0, sizeof(struct curparse
));
261 curp
.inttype
= INTT_AUTO
;
262 curp
.outtype
= OUTT_ASCII
;
263 curp
.wlevel
= MANDOCLEVEL_FATAL
;
266 while (-1 != (c
= getopt(argc
, argv
, "m:O:T:VW:")))
269 if ( ! moptions(&curp
.inttype
, optarg
))
270 return((int)MANDOCLEVEL_BADARG
);
273 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
274 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
277 if ( ! toptions(&curp
, optarg
))
278 return((int)MANDOCLEVEL_BADARG
);
281 if ( ! woptions(&curp
, optarg
))
282 return((int)MANDOCLEVEL_BADARG
);
296 curp
.file
= "<stdin>";
297 curp
.fd
= STDIN_FILENO
;
304 if (MANDOCLEVEL_OK
!= exit_status
&& curp
.wstop
)
310 (*curp
.outfree
)(curp
.outdata
);
312 mdoc_free(curp
.pmdoc
);
316 roff_free(curp
.roff
);
318 return((int)exit_status
);
326 (void)printf("%s %s\n", progname
, VERSION
);
327 exit((int)MANDOCLEVEL_OK
);
335 (void)fprintf(stderr
, "usage: %s "
345 exit((int)MANDOCLEVEL_BADARG
);
349 ffile(const char *file
, struct curparse
*curp
)
353 * Called once per input file. Get the file ready for reading,
354 * pass it through to the parser-driver, then close it out.
355 * XXX: don't do anything special as this is only called for
356 * files; stdin goes directly to fdesc().
361 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
363 exit_status
= MANDOCLEVEL_SYSERR
;
369 if (-1 == close(curp
->fd
))
374 pfile(const char *file
, struct curparse
*curp
)
376 const char *savefile
;
379 if (-1 == (fd
= open(file
, O_RDONLY
, 0))) {
381 file_status
= MANDOCLEVEL_SYSERR
;
385 savefile
= curp
->file
;
393 curp
->file
= savefile
;
399 return(MANDOCLEVEL_FATAL
> file_status
? 1 : 0);
404 resize_buf(struct buf
*buf
, size_t initial
)
407 buf
->sz
= buf
->sz
> initial
/2 ? 2 * buf
->sz
: initial
;
408 buf
->buf
= realloc(buf
->buf
, buf
->sz
);
409 if (NULL
== buf
->buf
) {
411 exit((int)MANDOCLEVEL_SYSERR
);
417 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
423 if (-1 == fstat(curp
->fd
, &st
)) {
429 * If we're a regular file, try just reading in the whole entry
430 * via mmap(). This is faster than reading it into blocks, and
431 * since each file is only a few bytes to begin with, I'm not
432 * concerned that this is going to tank any machines.
436 if (S_ISREG(st
.st_mode
)) {
437 if (st
.st_size
>= (1U << 31)) {
438 fprintf(stderr
, "%s: input too large\n",
443 fb
->sz
= (size_t)st
.st_size
;
444 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
445 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
446 if (fb
->buf
!= MAP_FAILED
)
452 * If this isn't a regular file (like, say, stdin), then we must
453 * go the old way and just read things in bit by bit.
462 if (fb
->sz
== (1U << 31)) {
463 fprintf(stderr
, "%s: input too large\n",
467 resize_buf(fb
, 65536);
469 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
488 fdesc(struct curparse
*curp
)
492 * Called once per file with an opened file descriptor. All
493 * pre-file-parse operations (whether stdin or a file) should go
496 * This calls down into the nested parser, which drills down and
497 * fully parses a file and all its dependences (i.e., `so'). It
498 * then runs the cleanup validators and pushes to output.
501 /* Zero the parse type. */
505 file_status
= MANDOCLEVEL_OK
;
507 /* Make sure the mandotory roff parser is initialised. */
509 if (NULL
== curp
->roff
) {
510 curp
->roff
= roff_alloc(&curp
->regs
, curp
, mmsg
);
514 /* Fully parse the file. */
518 if (MANDOCLEVEL_FATAL
<= file_status
)
521 /* NOTE a parser may not have been assigned, yet. */
523 if ( ! (curp
->man
|| curp
->mdoc
)) {
524 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
525 file_status
= MANDOCLEVEL_FATAL
;
529 /* Clean up the parse routine ASTs. */
531 if (curp
->mdoc
&& ! mdoc_endparse(curp
->mdoc
)) {
532 assert(MANDOCLEVEL_FATAL
<= file_status
);
536 if (curp
->man
&& ! man_endparse(curp
->man
)) {
537 assert(MANDOCLEVEL_FATAL
<= file_status
);
542 roff_endparse(curp
->roff
);
545 * With -Wstop and warnings or errors of at least
546 * the requested level, do not produce output.
549 if (MANDOCLEVEL_OK
!= file_status
&& curp
->wstop
)
552 /* If unset, allocate output dev now (if applicable). */
554 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
555 switch (curp
->outtype
) {
557 curp
->outdata
= xhtml_alloc(curp
->outopts
);
560 curp
->outdata
= html_alloc(curp
->outopts
);
563 curp
->outdata
= ascii_alloc(curp
->outopts
);
564 curp
->outfree
= ascii_free
;
567 curp
->outdata
= pdf_alloc(curp
->outopts
);
568 curp
->outfree
= pspdf_free
;
571 curp
->outdata
= ps_alloc(curp
->outopts
);
572 curp
->outfree
= pspdf_free
;
578 switch (curp
->outtype
) {
582 curp
->outman
= html_man
;
583 curp
->outmdoc
= html_mdoc
;
584 curp
->outfree
= html_free
;
587 curp
->outman
= tree_man
;
588 curp
->outmdoc
= tree_mdoc
;
595 curp
->outman
= terminal_man
;
596 curp
->outmdoc
= terminal_mdoc
;
603 /* Execute the out device, if it exists. */
605 if (curp
->man
&& curp
->outman
)
606 (*curp
->outman
)(curp
->outdata
, curp
->man
);
607 if (curp
->mdoc
&& curp
->outmdoc
)
608 (*curp
->outmdoc
)(curp
->outdata
, curp
->mdoc
);
612 memset(&curp
->regs
, 0, sizeof(struct regset
));
614 /* Reset the current-parse compilers. */
617 mdoc_reset(curp
->mdoc
);
619 man_reset(curp
->man
);
622 roff_reset(curp
->roff
);
624 if (exit_status
< file_status
)
625 exit_status
= file_status
;
631 pdesc(struct curparse
*curp
)
637 * Run for each opened file; may be called more than once for
638 * each full parse sequence if the opened file is nested (i.e.,
639 * from `so'). Simply sucks in the whole file and moves into
640 * the parse phase for the file.
643 if ( ! read_whole_file(curp
, &blk
, &with_mmap
)) {
644 file_status
= MANDOCLEVEL_SYSERR
;
648 /* Line number is per-file. */
652 parsebuf(curp
, blk
, 1);
656 munmap(blk
.buf
, blk
.sz
);
663 parsebuf(struct curparse
*curp
, struct buf blk
, int start
)
668 int pos
; /* byte number in the ln buffer */
669 int lnn
; /* line number in the real file */
673 * Main parse routine for an opened file. This is called for
674 * each opened file and simply loops around the full input file,
675 * possibly nesting (i.e., with `so').
678 memset(&ln
, 0, sizeof(struct buf
));
683 for (i
= 0; i
< (int)blk
.sz
; ) {
684 if (0 == pos
&& '\0' == blk
.buf
[i
])
689 curp
->reparse_count
= 0;
692 while (i
< (int)blk
.sz
&& (start
|| '\0' != blk
.buf
[i
])) {
693 if ('\n' == blk
.buf
[i
]) {
700 * Warn about bogus characters. If you're using
701 * non-ASCII encoding, you're screwing your
702 * readers. Since I'd rather this not happen,
703 * I'll be helpful and drop these characters so
704 * we don't display gibberish. Note to manual
705 * writers: use special characters.
708 c
= (unsigned char) blk
.buf
[i
];
710 if ( ! (isascii(c
) &&
711 (isgraph(c
) || isblank(c
)))) {
712 mmsg(MANDOCERR_BADCHAR
, curp
,
713 curp
->line
, pos
, "ignoring byte");
718 /* Trailing backslash = a plain char. */
720 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
721 if (pos
>= (int)ln
.sz
)
722 resize_buf(&ln
, 256);
723 ln
.buf
[pos
++] = blk
.buf
[i
++];
727 /* Found escape & at least one other char. */
729 if ('\n' == blk
.buf
[i
+ 1]) {
731 /* Escaped newlines are skipped over */
736 if ('"' == blk
.buf
[i
+ 1]) {
738 /* Comment, skip to end of line */
739 for (; i
< (int)blk
.sz
; ++i
) {
740 if ('\n' == blk
.buf
[i
]) {
747 /* Backout trailing whitespaces */
748 for (; pos
> 0; --pos
) {
749 if (ln
.buf
[pos
- 1] != ' ')
751 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
757 /* Some other escape sequence, copy & cont. */
759 if (pos
+ 1 >= (int)ln
.sz
)
760 resize_buf(&ln
, 256);
762 ln
.buf
[pos
++] = blk
.buf
[i
++];
763 ln
.buf
[pos
++] = blk
.buf
[i
++];
766 if (pos
>= (int)ln
.sz
)
767 resize_buf(&ln
, 256);
772 * A significant amount of complexity is contained by
773 * the roff preprocessor. It's line-oriented but can be
774 * expressed on one line, so we need at times to
775 * readjust our starting point and re-run it. The roff
776 * preprocessor can also readjust the buffers with new
777 * data, so we pass them in wholesale.
784 (curp
->roff
, curp
->line
,
785 &ln
.buf
, &ln
.sz
, of
, &of
);
789 if (REPARSE_LIMIT
>= ++curp
->reparse_count
)
790 parsebuf(curp
, ln
, 0);
792 mmsg(MANDOCERR_ROFFLOOP
, curp
,
793 curp
->line
, pos
, NULL
);
797 pos
= strlen(ln
.buf
);
805 assert(MANDOCLEVEL_FATAL
<= file_status
);
808 if (pfile(ln
.buf
+ of
, curp
)) {
818 * If we encounter errors in the recursive parsebuf()
819 * call, make sure we don't continue parsing.
822 if (MANDOCLEVEL_FATAL
<= file_status
)
826 * If input parsers have not been allocated, do so now.
827 * We keep these instanced betwen parsers, but set them
828 * locally per parse routine since we can use different
829 * parsers with each one.
832 if ( ! (curp
->man
|| curp
->mdoc
))
833 pset(ln
.buf
+ of
, pos
- of
, curp
);
836 * Lastly, push down into the parsers themselves. One
837 * of these will have already been set in the pset()
839 * If libroff returns ROFF_TBL, then add it to the
840 * currently open parse. Since we only get here if
841 * there does exist data (see tbl_data.c), we're
842 * guaranteed that something's been allocated.
845 if (ROFF_TBL
== rr
) {
846 assert(curp
->man
|| curp
->mdoc
);
848 man_addspan(curp
->man
, roff_span(curp
->roff
));
850 mdoc_addspan(curp
->mdoc
, roff_span(curp
->roff
));
852 } else if (curp
->man
|| curp
->mdoc
) {
854 man_parseln(curp
->man
,
855 curp
->line
, ln
.buf
, of
) :
856 mdoc_parseln(curp
->mdoc
,
857 curp
->line
, ln
.buf
, of
);
860 assert(MANDOCLEVEL_FATAL
<= file_status
);
865 /* Temporary buffers typically are not full. */
867 if (0 == start
&& '\0' == blk
.buf
[i
])
870 /* Start the next input line. */
879 pset(const char *buf
, int pos
, struct curparse
*curp
)
884 * Try to intuit which kind of manual parser should be used. If
885 * passed in by command-line (-man, -mdoc), then use that
886 * explicitly. If passed as -mandoc, then try to guess from the
887 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
888 * default to -man, which is more lenient.
890 * Separate out pmdoc/pman from mdoc/man: the first persists
891 * through all parsers, while the latter is used per-parse.
894 if ('.' == buf
[0] || '\'' == buf
[0]) {
895 for (i
= 1; buf
[i
]; i
++)
896 if (' ' != buf
[i
] && '\t' != buf
[i
])
902 switch (curp
->inttype
) {
904 if (NULL
== curp
->pmdoc
)
905 curp
->pmdoc
= mdoc_alloc
906 (&curp
->regs
, curp
, mmsg
);
908 curp
->mdoc
= curp
->pmdoc
;
911 if (NULL
== curp
->pman
)
912 curp
->pman
= man_alloc
913 (&curp
->regs
, curp
, mmsg
);
915 curp
->man
= curp
->pman
;
921 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
922 if (NULL
== curp
->pmdoc
)
923 curp
->pmdoc
= mdoc_alloc
924 (&curp
->regs
, curp
, mmsg
);
926 curp
->mdoc
= curp
->pmdoc
;
930 if (NULL
== curp
->pman
)
931 curp
->pman
= man_alloc(&curp
->regs
, curp
, mmsg
);
933 curp
->man
= curp
->pman
;
937 moptions(enum intt
*tflags
, char *arg
)
940 if (0 == strcmp(arg
, "doc"))
942 else if (0 == strcmp(arg
, "andoc"))
944 else if (0 == strcmp(arg
, "an"))
947 fprintf(stderr
, "%s: Bad argument\n", arg
);
955 toptions(struct curparse
*curp
, char *arg
)
958 if (0 == strcmp(arg
, "ascii"))
959 curp
->outtype
= OUTT_ASCII
;
960 else if (0 == strcmp(arg
, "lint")) {
961 curp
->outtype
= OUTT_LINT
;
962 curp
->wlevel
= MANDOCLEVEL_WARNING
;
964 else if (0 == strcmp(arg
, "tree"))
965 curp
->outtype
= OUTT_TREE
;
966 else if (0 == strcmp(arg
, "html"))
967 curp
->outtype
= OUTT_HTML
;
968 else if (0 == strcmp(arg
, "xhtml"))
969 curp
->outtype
= OUTT_XHTML
;
970 else if (0 == strcmp(arg
, "ps"))
971 curp
->outtype
= OUTT_PS
;
972 else if (0 == strcmp(arg
, "pdf"))
973 curp
->outtype
= OUTT_PDF
;
975 fprintf(stderr
, "%s: Bad argument\n", arg
);
983 woptions(struct curparse
*curp
, char *arg
)
997 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
1004 curp
->wlevel
= MANDOCLEVEL_WARNING
;
1007 curp
->wlevel
= MANDOCLEVEL_ERROR
;
1010 curp
->wlevel
= MANDOCLEVEL_FATAL
;
1013 fprintf(stderr
, "-W%s: Bad argument\n", o
);
1022 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
1024 struct curparse
*cp
;
1025 enum mandoclevel level
;
1027 level
= MANDOCLEVEL_FATAL
;
1028 while (t
< mandoclimits
[level
])
1032 cp
= (struct curparse
*)arg
;
1033 if (level
< cp
->wlevel
)
1036 fprintf(stderr
, "%s:%d:%d: %s: %s",
1037 cp
->file
, ln
, col
+ 1, mandoclevels
[level
], mandocerrs
[t
]);
1039 fprintf(stderr
, ": %s", msg
);
1040 fputc('\n', stderr
);
1042 if (file_status
< level
)
1043 file_status
= level
;
1045 return(level
< MANDOCLEVEL_FATAL
);