8354 sync regcomp(3C) with upstream (fix make catalog)
[unleashed/tickless.git] / usr / src / cmd / mandoc / read.c
blob0c5188b757e8b2e87e133d08216b6b4eed82659c
1 /* $Id: read.c,v 1.150.2.5 2017/01/09 02:25:53 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 #include "config.h"
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
27 #include <assert.h>
28 #include <ctype.h>
29 #if HAVE_ERR
30 #include <err.h>
31 #endif
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <stdarg.h>
35 #include <stdint.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <zlib.h>
42 #include "mandoc_aux.h"
43 #include "mandoc.h"
44 #include "roff.h"
45 #include "mdoc.h"
46 #include "man.h"
47 #include "libmandoc.h"
48 #include "roff_int.h"
50 #define REPARSE_LIMIT 1000
52 struct mparse {
53 struct roff_man *man; /* man parser */
54 struct roff *roff; /* roff parser (!NULL) */
55 char *sodest; /* filename pointed to by .so */
56 const char *file; /* filename of current input file */
57 struct buf *primary; /* buffer currently being parsed */
58 struct buf *secondary; /* preprocessed copy of input */
59 const char *defos; /* default operating system */
60 mandocmsg mmsg; /* warning/error message handler */
61 enum mandoclevel file_status; /* status of current parse */
62 enum mandoclevel wlevel; /* ignore messages below this */
63 int options; /* parser options */
64 int gzip; /* current input file is gzipped */
65 int filenc; /* encoding of the current file */
66 int reparse_count; /* finite interp. stack */
67 int line; /* line number in the file */
70 static void choose_parser(struct mparse *);
71 static void resize_buf(struct buf *, size_t);
72 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
73 static int read_whole_file(struct mparse *, const char *, int,
74 struct buf *, int *);
75 static void mparse_end(struct mparse *);
76 static void mparse_parse_buffer(struct mparse *, struct buf,
77 const char *);
79 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
80 MANDOCERR_OK,
81 MANDOCERR_WARNING,
82 MANDOCERR_WARNING,
83 MANDOCERR_ERROR,
84 MANDOCERR_UNSUPP,
85 MANDOCERR_MAX,
86 MANDOCERR_MAX
89 static const char * const mandocerrs[MANDOCERR_MAX] = {
90 "ok",
92 "generic warning",
94 /* related to the prologue */
95 "missing manual title, using UNTITLED",
96 "missing manual title, using \"\"",
97 "lower case character in document title",
98 "missing manual section, using \"\"",
99 "unknown manual section",
100 "missing date, using today's date",
101 "cannot parse date, using it verbatim",
102 "missing Os macro, using \"\"",
103 "duplicate prologue macro",
104 "late prologue macro",
105 "skipping late title macro",
106 "prologue macros out of order",
108 /* related to document structure */
109 ".so is fragile, better use ln(1)",
110 "no document body",
111 "content before first section header",
112 "first section is not \"NAME\"",
113 "NAME section without Nm before Nd",
114 "NAME section without description",
115 "description not at the end of NAME",
116 "bad NAME section content",
117 "missing comma before name",
118 "missing description line, using \"\"",
119 "sections out of conventional order",
120 "duplicate section title",
121 "unexpected section",
122 "unusual Xr order",
123 "unusual Xr punctuation",
124 "AUTHORS section without An macro",
126 /* related to macros and nesting */
127 "obsolete macro",
128 "macro neither callable nor escaped",
129 "skipping paragraph macro",
130 "moving paragraph macro out of list",
131 "skipping no-space macro",
132 "blocks badly nested",
133 "nested displays are not portable",
134 "moving content out of list",
135 "fill mode already enabled, skipping",
136 "fill mode already disabled, skipping",
137 "line scope broken",
139 /* related to missing macro arguments */
140 "skipping empty request",
141 "conditional request controls empty scope",
142 "skipping empty macro",
143 "empty block",
144 "empty argument, using 0n",
145 "missing display type, using -ragged",
146 "list type is not the first argument",
147 "missing -width in -tag list, using 6n",
148 "missing utility name, using \"\"",
149 "missing function name, using \"\"",
150 "empty head in list item",
151 "empty list item",
152 "missing font type, using \\fR",
153 "unknown font type, using \\fR",
154 "nothing follows prefix",
155 "empty reference block",
156 "missing section argument",
157 "missing -std argument, adding it",
158 "missing option string, using \"\"",
159 "missing resource identifier, using \"\"",
160 "missing eqn box, using \"\"",
162 /* related to bad macro arguments */
163 "unterminated quoted argument",
164 "duplicate argument",
165 "skipping duplicate argument",
166 "skipping duplicate display type",
167 "skipping duplicate list type",
168 "skipping -width argument",
169 "wrong number of cells",
170 "unknown AT&T UNIX version",
171 "comma in function argument",
172 "parenthesis in function name",
173 "invalid content in Rs block",
174 "invalid Boolean argument",
175 "unknown font, skipping request",
176 "odd number of characters in request",
178 /* related to plain text */
179 "blank line in fill mode, using .sp",
180 "tab in filled text",
181 "whitespace at end of input line",
182 "bad comment style",
183 "invalid escape sequence",
184 "undefined string, using \"\"",
186 /* related to tables */
187 "tbl line starts with span",
188 "tbl column starts with span",
189 "skipping vertical bar in tbl layout",
191 "generic error",
193 /* related to tables */
194 "non-alphabetic character in tbl options",
195 "skipping unknown tbl option",
196 "missing tbl option argument",
197 "wrong tbl option argument size",
198 "empty tbl layout",
199 "invalid character in tbl layout",
200 "unmatched parenthesis in tbl layout",
201 "tbl without any data cells",
202 "ignoring data in spanned tbl cell",
203 "ignoring extra tbl data cells",
204 "data block open at end of tbl",
206 /* related to document structure and macros */
207 NULL,
208 "input stack limit exceeded, infinite loop?",
209 "skipping bad character",
210 "skipping unknown macro",
211 "skipping insecure request",
212 "skipping item outside list",
213 "skipping column outside column list",
214 "skipping end of block that is not open",
215 "fewer RS blocks open, skipping",
216 "inserting missing end of block",
217 "appending missing end of block",
219 /* related to request and macro arguments */
220 "escaped character not allowed in a name",
221 "NOT IMPLEMENTED: Bd -file",
222 "skipping display without arguments",
223 "missing list type, using -item",
224 "missing manual name, using \"\"",
225 "uname(3) system call failed, using UNKNOWN",
226 "unknown standard specifier",
227 "skipping request without numeric argument",
228 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
229 ".so request failed",
230 "skipping all arguments",
231 "skipping excess arguments",
232 "divide by zero",
234 "unsupported feature",
235 "input too large",
236 "unsupported control character",
237 "unsupported roff request",
238 "eqn delim option in tbl",
239 "unsupported tbl layout modifier",
240 "ignoring macro in table",
243 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
244 "SUCCESS",
245 "RESERVED",
246 "WARNING",
247 "ERROR",
248 "UNSUPP",
249 "BADARG",
250 "SYSERR"
254 static void
255 resize_buf(struct buf *buf, size_t initial)
258 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
259 buf->buf = mandoc_realloc(buf->buf, buf->sz);
262 static void
263 choose_parser(struct mparse *curp)
265 char *cp, *ep;
266 int format;
269 * If neither command line arguments -mdoc or -man select
270 * a parser nor the roff parser found a .Dd or .TH macro
271 * yet, look ahead in the main input buffer.
274 if ((format = roff_getformat(curp->roff)) == 0) {
275 cp = curp->primary->buf;
276 ep = cp + curp->primary->sz;
277 while (cp < ep) {
278 if (*cp == '.' || *cp == '\'') {
279 cp++;
280 if (cp[0] == 'D' && cp[1] == 'd') {
281 format = MPARSE_MDOC;
282 break;
284 if (cp[0] == 'T' && cp[1] == 'H') {
285 format = MPARSE_MAN;
286 break;
289 cp = memchr(cp, '\n', ep - cp);
290 if (cp == NULL)
291 break;
292 cp++;
296 if (format == MPARSE_MDOC) {
297 mdoc_hash_init();
298 curp->man->macroset = MACROSET_MDOC;
299 curp->man->first->tok = TOKEN_NONE;
300 } else {
301 man_hash_init();
302 curp->man->macroset = MACROSET_MAN;
303 curp->man->first->tok = TOKEN_NONE;
308 * Main parse routine for a buffer.
309 * It assumes encoding and line numbering are already set up.
310 * It can recurse directly (for invocations of user-defined
311 * macros, inline equations, and input line traps)
312 * and indirectly (for .so file inclusion).
314 static void
315 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
317 const struct tbl_span *span;
318 struct buf ln;
319 const char *save_file;
320 char *cp;
321 size_t pos; /* byte number in the ln buffer */
322 size_t j; /* auxiliary byte number in the blk buffer */
323 enum rofferr rr;
324 int of;
325 int lnn; /* line number in the real file */
326 int fd;
327 unsigned char c;
329 memset(&ln, 0, sizeof(ln));
331 lnn = curp->line;
332 pos = 0;
334 while (i < blk.sz) {
335 if (0 == pos && '\0' == blk.buf[i])
336 break;
338 if (start) {
339 curp->line = lnn;
340 curp->reparse_count = 0;
342 if (lnn < 3 &&
343 curp->filenc & MPARSE_UTF8 &&
344 curp->filenc & MPARSE_LATIN1)
345 curp->filenc = preconv_cue(&blk, i);
348 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
351 * When finding an unescaped newline character,
352 * leave the character loop to process the line.
353 * Skip a preceding carriage return, if any.
356 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
357 '\n' == blk.buf[i + 1])
358 ++i;
359 if ('\n' == blk.buf[i]) {
360 ++i;
361 ++lnn;
362 break;
366 * Make sure we have space for the worst
367 * case of 11 bytes: "\\[u10ffff]\0"
370 if (pos + 11 > ln.sz)
371 resize_buf(&ln, 256);
374 * Encode 8-bit input.
377 c = blk.buf[i];
378 if (c & 0x80) {
379 if ( ! (curp->filenc && preconv_encode(
380 &blk, &i, &ln, &pos, &curp->filenc))) {
381 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
382 curp->line, pos, "0x%x", c);
383 ln.buf[pos++] = '?';
384 i++;
386 continue;
390 * Exclude control characters.
393 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
394 mandoc_vmsg(c == 0x00 || c == 0x04 ||
395 c > 0x0a ? MANDOCERR_CHAR_BAD :
396 MANDOCERR_CHAR_UNSUPP,
397 curp, curp->line, pos, "0x%x", c);
398 i++;
399 if (c != '\r')
400 ln.buf[pos++] = '?';
401 continue;
404 /* Trailing backslash = a plain char. */
406 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
407 ln.buf[pos++] = blk.buf[i++];
408 continue;
412 * Found escape and at least one other character.
413 * When it's a newline character, skip it.
414 * When there is a carriage return in between,
415 * skip that one as well.
418 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
419 '\n' == blk.buf[i + 2])
420 ++i;
421 if ('\n' == blk.buf[i + 1]) {
422 i += 2;
423 ++lnn;
424 continue;
427 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
428 j = i;
429 i += 2;
430 /* Comment, skip to end of line */
431 for (; i < blk.sz; ++i) {
432 if (blk.buf[i] != '\n')
433 continue;
434 if (blk.buf[i - 1] == ' ' ||
435 blk.buf[i - 1] == '\t')
436 mandoc_msg(
437 MANDOCERR_SPACE_EOL,
438 curp, curp->line,
439 pos + i-1 - j, NULL);
440 ++i;
441 ++lnn;
442 break;
445 /* Backout trailing whitespaces */
446 for (; pos > 0; --pos) {
447 if (ln.buf[pos - 1] != ' ')
448 break;
449 if (pos > 2 && ln.buf[pos - 2] == '\\')
450 break;
452 break;
455 /* Catch escaped bogus characters. */
457 c = (unsigned char) blk.buf[i+1];
459 if ( ! (isascii(c) &&
460 (isgraph(c) || isblank(c)))) {
461 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
462 curp->line, pos, "0x%x", c);
463 i += 2;
464 ln.buf[pos++] = '?';
465 continue;
468 /* Some other escape sequence, copy & cont. */
470 ln.buf[pos++] = blk.buf[i++];
471 ln.buf[pos++] = blk.buf[i++];
474 if (pos >= ln.sz)
475 resize_buf(&ln, 256);
477 ln.buf[pos] = '\0';
480 * A significant amount of complexity is contained by
481 * the roff preprocessor. It's line-oriented but can be
482 * expressed on one line, so we need at times to
483 * readjust our starting point and re-run it. The roff
484 * preprocessor can also readjust the buffers with new
485 * data, so we pass them in wholesale.
488 of = 0;
491 * Maintain a lookaside buffer of all parsed lines. We
492 * only do this if mparse_keep() has been invoked (the
493 * buffer may be accessed with mparse_getkeep()).
496 if (curp->secondary) {
497 curp->secondary->buf = mandoc_realloc(
498 curp->secondary->buf,
499 curp->secondary->sz + pos + 2);
500 memcpy(curp->secondary->buf +
501 curp->secondary->sz,
502 ln.buf, pos);
503 curp->secondary->sz += pos;
504 curp->secondary->buf
505 [curp->secondary->sz] = '\n';
506 curp->secondary->sz++;
507 curp->secondary->buf
508 [curp->secondary->sz] = '\0';
510 rerun:
511 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
513 switch (rr) {
514 case ROFF_REPARSE:
515 if (REPARSE_LIMIT >= ++curp->reparse_count)
516 mparse_buf_r(curp, ln, of, 0);
517 else
518 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
519 curp->line, pos, NULL);
520 pos = 0;
521 continue;
522 case ROFF_APPEND:
523 pos = strlen(ln.buf);
524 continue;
525 case ROFF_RERUN:
526 goto rerun;
527 case ROFF_IGN:
528 pos = 0;
529 continue;
530 case ROFF_SO:
531 if ( ! (curp->options & MPARSE_SO) &&
532 (i >= blk.sz || blk.buf[i] == '\0')) {
533 curp->sodest = mandoc_strdup(ln.buf + of);
534 free(ln.buf);
535 return;
538 * We remove `so' clauses from our lookaside
539 * buffer because we're going to descend into
540 * the file recursively.
542 if (curp->secondary)
543 curp->secondary->sz -= pos + 1;
544 save_file = curp->file;
545 if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
546 mparse_readfd(curp, fd, ln.buf + of);
547 close(fd);
548 curp->file = save_file;
549 } else {
550 curp->file = save_file;
551 mandoc_vmsg(MANDOCERR_SO_FAIL,
552 curp, curp->line, pos,
553 ".so %s", ln.buf + of);
554 ln.sz = mandoc_asprintf(&cp,
555 ".sp\nSee the file %s.\n.sp",
556 ln.buf + of);
557 free(ln.buf);
558 ln.buf = cp;
559 of = 0;
560 mparse_buf_r(curp, ln, of, 0);
562 pos = 0;
563 continue;
564 default:
565 break;
568 if (curp->man->macroset == MACROSET_NONE)
569 choose_parser(curp);
572 * Lastly, push down into the parsers themselves.
573 * If libroff returns ROFF_TBL, then add it to the
574 * currently open parse. Since we only get here if
575 * there does exist data (see tbl_data.c), we're
576 * guaranteed that something's been allocated.
577 * Do the same for ROFF_EQN.
580 if (rr == ROFF_TBL)
581 while ((span = roff_span(curp->roff)) != NULL)
582 roff_addtbl(curp->man, span);
583 else if (rr == ROFF_EQN)
584 roff_addeqn(curp->man, roff_eqn(curp->roff));
585 else if ((curp->man->macroset == MACROSET_MDOC ?
586 mdoc_parseln(curp->man, curp->line, ln.buf, of) :
587 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
588 break;
590 /* Temporary buffers typically are not full. */
592 if (0 == start && '\0' == blk.buf[i])
593 break;
595 /* Start the next input line. */
597 pos = 0;
600 free(ln.buf);
603 static int
604 read_whole_file(struct mparse *curp, const char *file, int fd,
605 struct buf *fb, int *with_mmap)
607 gzFile gz;
608 size_t off;
609 ssize_t ssz;
611 #if HAVE_MMAP
612 struct stat st;
614 if (fstat(fd, &st) == -1)
615 err((int)MANDOCLEVEL_SYSERR, "%s", file);
618 * If we're a regular file, try just reading in the whole entry
619 * via mmap(). This is faster than reading it into blocks, and
620 * since each file is only a few bytes to begin with, I'm not
621 * concerned that this is going to tank any machines.
624 if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
625 if (st.st_size > 0x7fffffff) {
626 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
627 return 0;
629 *with_mmap = 1;
630 fb->sz = (size_t)st.st_size;
631 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
632 if (fb->buf != MAP_FAILED)
633 return 1;
635 #endif
637 if (curp->gzip) {
638 if ((gz = gzdopen(fd, "rb")) == NULL)
639 err((int)MANDOCLEVEL_SYSERR, "%s", file);
640 } else
641 gz = NULL;
644 * If this isn't a regular file (like, say, stdin), then we must
645 * go the old way and just read things in bit by bit.
648 *with_mmap = 0;
649 off = 0;
650 fb->sz = 0;
651 fb->buf = NULL;
652 for (;;) {
653 if (off == fb->sz) {
654 if (fb->sz == (1U << 31)) {
655 mandoc_msg(MANDOCERR_TOOLARGE, curp,
656 0, 0, NULL);
657 break;
659 resize_buf(fb, 65536);
661 ssz = curp->gzip ?
662 gzread(gz, fb->buf + (int)off, fb->sz - off) :
663 read(fd, fb->buf + (int)off, fb->sz - off);
664 if (ssz == 0) {
665 fb->sz = off;
666 return 1;
668 if (ssz == -1)
669 err((int)MANDOCLEVEL_SYSERR, "%s", file);
670 off += (size_t)ssz;
673 free(fb->buf);
674 fb->buf = NULL;
675 return 0;
678 static void
679 mparse_end(struct mparse *curp)
681 if (curp->man->macroset == MACROSET_NONE)
682 curp->man->macroset = MACROSET_MAN;
683 if (curp->man->macroset == MACROSET_MDOC)
684 mdoc_endparse(curp->man);
685 else
686 man_endparse(curp->man);
687 roff_endparse(curp->roff);
690 static void
691 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
693 struct buf *svprimary;
694 const char *svfile;
695 size_t offset;
696 static int recursion_depth;
698 if (64 < recursion_depth) {
699 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
700 return;
703 /* Line number is per-file. */
704 svfile = curp->file;
705 curp->file = file;
706 svprimary = curp->primary;
707 curp->primary = &blk;
708 curp->line = 1;
709 recursion_depth++;
711 /* Skip an UTF-8 byte order mark. */
712 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
713 (unsigned char)blk.buf[0] == 0xef &&
714 (unsigned char)blk.buf[1] == 0xbb &&
715 (unsigned char)blk.buf[2] == 0xbf) {
716 offset = 3;
717 curp->filenc &= ~MPARSE_LATIN1;
718 } else
719 offset = 0;
721 mparse_buf_r(curp, blk, offset, 1);
723 if (--recursion_depth == 0)
724 mparse_end(curp);
726 curp->primary = svprimary;
727 curp->file = svfile;
730 enum mandoclevel
731 mparse_readmem(struct mparse *curp, void *buf, size_t len,
732 const char *file)
734 struct buf blk;
736 blk.buf = buf;
737 blk.sz = len;
739 mparse_parse_buffer(curp, blk, file);
740 return curp->file_status;
744 * Read the whole file into memory and call the parsers.
745 * Called recursively when an .so request is encountered.
747 enum mandoclevel
748 mparse_readfd(struct mparse *curp, int fd, const char *file)
750 struct buf blk;
751 int with_mmap;
752 int save_filenc;
754 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
755 save_filenc = curp->filenc;
756 curp->filenc = curp->options &
757 (MPARSE_UTF8 | MPARSE_LATIN1);
758 mparse_parse_buffer(curp, blk, file);
759 curp->filenc = save_filenc;
760 #if HAVE_MMAP
761 if (with_mmap)
762 munmap(blk.buf, blk.sz);
763 else
764 #endif
765 free(blk.buf);
767 return curp->file_status;
771 mparse_open(struct mparse *curp, const char *file)
773 char *cp;
774 int fd;
776 curp->file = file;
777 cp = strrchr(file, '.');
778 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
780 /* First try to use the filename as it is. */
782 if ((fd = open(file, O_RDONLY)) != -1)
783 return fd;
786 * If that doesn't work and the filename doesn't
787 * already end in .gz, try appending .gz.
790 if ( ! curp->gzip) {
791 mandoc_asprintf(&cp, "%s.gz", file);
792 fd = open(cp, O_RDONLY);
793 free(cp);
794 if (fd != -1) {
795 curp->gzip = 1;
796 return fd;
800 /* Neither worked, give up. */
802 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
803 return -1;
806 struct mparse *
807 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
808 const char *defos)
810 struct mparse *curp;
812 curp = mandoc_calloc(1, sizeof(struct mparse));
814 curp->options = options;
815 curp->wlevel = wlevel;
816 curp->mmsg = mmsg;
817 curp->defos = defos;
819 curp->roff = roff_alloc(curp, options);
820 curp->man = roff_man_alloc( curp->roff, curp, curp->defos,
821 curp->options & MPARSE_QUICK ? 1 : 0);
822 if (curp->options & MPARSE_MDOC) {
823 mdoc_hash_init();
824 curp->man->macroset = MACROSET_MDOC;
825 } else if (curp->options & MPARSE_MAN) {
826 man_hash_init();
827 curp->man->macroset = MACROSET_MAN;
829 curp->man->first->tok = TOKEN_NONE;
830 return curp;
833 void
834 mparse_reset(struct mparse *curp)
836 roff_reset(curp->roff);
837 roff_man_reset(curp->man);
838 if (curp->secondary)
839 curp->secondary->sz = 0;
841 curp->file_status = MANDOCLEVEL_OK;
843 free(curp->sodest);
844 curp->sodest = NULL;
847 void
848 mparse_free(struct mparse *curp)
851 roff_man_free(curp->man);
852 if (curp->roff)
853 roff_free(curp->roff);
854 if (curp->secondary)
855 free(curp->secondary->buf);
857 free(curp->secondary);
858 free(curp->sodest);
859 free(curp);
862 void
863 mparse_result(struct mparse *curp, struct roff_man **man,
864 char **sodest)
867 if (sodest && NULL != (*sodest = curp->sodest)) {
868 *man = NULL;
869 return;
871 if (man)
872 *man = curp->man;
875 void
876 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
878 if (curp->file_status > *rc)
879 *rc = curp->file_status;
882 void
883 mandoc_vmsg(enum mandocerr t, struct mparse *m,
884 int ln, int pos, const char *fmt, ...)
886 char buf[256];
887 va_list ap;
889 va_start(ap, fmt);
890 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
891 va_end(ap);
893 mandoc_msg(t, m, ln, pos, buf);
896 void
897 mandoc_msg(enum mandocerr er, struct mparse *m,
898 int ln, int col, const char *msg)
900 enum mandoclevel level;
902 level = MANDOCLEVEL_UNSUPP;
903 while (er < mandoclimits[level])
904 level--;
906 if (level < m->wlevel && er != MANDOCERR_FILE)
907 return;
909 if (m->mmsg)
910 (*m->mmsg)(er, level, m->file, ln, col, msg);
912 if (m->file_status < level)
913 m->file_status = level;
916 const char *
917 mparse_strerror(enum mandocerr er)
920 return mandocerrs[er];
923 const char *
924 mparse_strlevel(enum mandoclevel lvl)
926 return mandoclevels[lvl];
929 void
930 mparse_keep(struct mparse *p)
933 assert(NULL == p->secondary);
934 p->secondary = mandoc_calloc(1, sizeof(struct buf));
937 const char *
938 mparse_getkeep(const struct mparse *p)
941 assert(p->secondary);
942 return p->secondary->sz ? p->secondary->buf : NULL;