external/bsd/mdocml/dist/roff.c

   1 /*      $Vendor-Id: roff.c,v 1.172 2011/10/24 21:41:45 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #ifdef HAVE_CONFIG_H
  19 #include "config.h"
  20 #endif
  21
  22 #include <assert.h>
  23 #include <ctype.h>
  24 #include <stdio.h>
  25 #include <stdlib.h>
  26 #include <string.h>
  27 #include <stdint.h>
  28
  29 #include "mandoc.h"
  30 #include "libroff.h"
  31 #include "libmandoc.h"
  32
  33 /* Maximum number of nested if-else conditionals. */
  34 #define RSTACK_MAX      128
  35
  36 /* Maximum number of string expansions per line, to break infinite loops. */
  37 #define EXPAND_LIMIT    1000
  38
  39 enum    rofft {
  40         ROFF_ad,
  41         ROFF_am,
  42         ROFF_ami,
  43         ROFF_am1,
  44         ROFF_de,
  45         ROFF_dei,
  46         ROFF_de1,
  47         ROFF_ds,
  48         ROFF_el,
  49         ROFF_hy,
  50         ROFF_ie,
  51         ROFF_if,
  52         ROFF_ig,
  53         ROFF_it,
  54         ROFF_ne,
  55         ROFF_nh,
  56         ROFF_nr,
  57         ROFF_ns,
  58         ROFF_ps,
  59         ROFF_rm,
  60         ROFF_so,
  61         ROFF_ta,
  62         ROFF_tr,
  63         ROFF_TS,
  64         ROFF_TE,
  65         ROFF_T_,
  66         ROFF_EQ,
  67         ROFF_EN,
  68         ROFF_cblock,
  69         ROFF_ccond,
  70         ROFF_USERDEF,
  71         ROFF_MAX
  72 };
  73
  74 enum    roffrule {
  75         ROFFRULE_DENY,
  76         ROFFRULE_ALLOW
  77 };
  78
  79 /*
  80  * A single register entity.  If "set" is zero, the value of the
  81  * register should be the default one, which is per-register.
  82  * Registers are assumed to be unsigned ints for now.
  83  */
  84 struct  reg {
  85         int              set; /* whether set or not */
  86         unsigned int     u; /* unsigned integer */
  87 };
  88
  89 /*
  90  * An incredibly-simple string buffer.
  91  */
  92 struct  roffstr {
  93         char            *p; /* nil-terminated buffer */
  94         size_t           sz; /* saved strlen(p) */
  95 };
  96
  97 /*
  98  * A key-value roffstr pair as part of a singly-linked list.
  99  */
 100 struct  roffkv {
 101         struct roffstr   key;
 102         struct roffstr   val;
 103         struct roffkv   *next; /* next in list */
 104 };
 105
 106 struct  roff {
 107         struct mparse   *parse; /* parse point */
 108         struct roffnode *last; /* leaf of stack */
 109         enum roffrule    rstack[RSTACK_MAX]; /* stack of !`ie' rules */
 110         int              rstackpos; /* position in rstack */
 111         struct reg       regs[REG__MAX];
 112         struct roffkv   *strtab; /* user-defined strings & macros */
 113         struct roffkv   *xmbtab; /* multi-byte trans table (`tr') */
 114         struct roffstr  *xtab; /* single-byte trans table (`tr') */
 115         const char      *current_string; /* value of last called user macro */
 116         struct tbl_node *first_tbl; /* first table parsed */
 117         struct tbl_node *last_tbl; /* last table parsed */
 118         struct tbl_node *tbl; /* current table being parsed */
 119         struct eqn_node *last_eqn; /* last equation parsed */
 120         struct eqn_node *first_eqn; /* first equation parsed */
 121         struct eqn_node *eqn; /* current equation being parsed */
 122         struct roff_nr  *nr[64];        /* numbered register set */
 123 };
 124
 125 struct  roffnode {
 126         enum rofft       tok; /* type of node */
 127         struct roffnode *parent; /* up one in stack */
 128         int              line; /* parse line */
 129         int              col; /* parse col */
 130         char            *name; /* node name, e.g. macro name */
 131         char            *end; /* end-rules: custom token */
 132         int              endspan; /* end-rules: next-line or infty */
 133         enum roffrule    rule; /* current evaluation rule */
 134 };
 135
 136 #define ROFF_ARGS        struct roff *r, /* parse ctx */ \
 137                          enum rofft tok, /* tok of macro */ \
 138                          char **bufp, /* input buffer */ \
 139                          size_t *szp, /* size of input buffer */ \
 140                          int ln, /* parse line */ \
 141                          int ppos, /* original pos in buffer */ \
 142                          int pos, /* current pos in buffer */ \
 143                          int *offs /* reset offset of buffer data */
 144
 145 typedef enum rofferr (*roffproc)(ROFF_ARGS);
 146
 147 struct  roffmac {
 148         const char      *name; /* macro name */
 149         roffproc         proc; /* process new macro */
 150         roffproc         text; /* process as child text of macro */
 151         roffproc         sub; /* process as child of macro */
 152         int              flags;
 153 #define ROFFMAC_STRUCT  (1 << 0) /* always interpret */
 154         struct roffmac  *next;
 155 };
 156
 157 struct  predef {
 158         const char      *name; /* predefined input name */
 159         const char      *str; /* replacement symbol */
 160 };
 161
 162 #define PREDEF(__name, __str) \
 163         { (__name), (__str) },
 164
 165 static  enum rofft       roffhash_find(const char *, size_t);
 166 static  void             roffhash_init(void);
 167 static  void             roffnode_cleanscope(struct roff *);
 168 static  void             roffnode_pop(struct roff *);
 169 static  void             roffnode_push(struct roff *, enum rofft,
 170                                 const char *, int, int);
 171 static  enum rofferr     roff_block(ROFF_ARGS);
 172 static  enum rofferr     roff_block_text(ROFF_ARGS);
 173 static  enum rofferr     roff_block_sub(ROFF_ARGS);
 174 static  enum rofferr     roff_cblock(ROFF_ARGS);
 175 static  enum rofferr     roff_ccond(ROFF_ARGS);
 176 static  enum rofferr     roff_cond(ROFF_ARGS);
 177 static  enum rofferr     roff_cond_text(ROFF_ARGS);
 178 static  enum rofferr     roff_cond_sub(ROFF_ARGS);
 179 static  enum rofferr     roff_ds(ROFF_ARGS);
 180 static  enum roffrule    roff_evalcond(const char *, int *);
 181 static  void             roff_free1(struct roff *);
 182 static  void             roff_freestr(struct roffkv *);
 183 static  char            *roff_getname(struct roff *, char **, int, int);
 184 static  const char      *roff_getstrn(const struct roff *,
 185                                 const char *, size_t);
 186 static  enum rofferr     roff_line_ignore(ROFF_ARGS);
 187 static  enum rofferr     roff_nr(ROFF_ARGS);
 188 static  void             roff_openeqn(struct roff *, const char *,
 189                                 int, int, const char *);
 190 static  enum rofft       roff_parse(struct roff *, const char *, int *);
 191 static  enum rofferr     roff_parsetext(char *);
 192 static  enum rofferr     roff_res(struct roff *,
 193                                 char **, size_t *, int, int);
 194 static  enum rofferr     roff_rm(ROFF_ARGS);
 195 static  void             roff_setstr(struct roff *,
 196                                 const char *, const char *, int);
 197 static  void             roff_setstrn(struct roffkv **, const char *,
 198                                 size_t, const char *, size_t, int);
 199 static  enum rofferr     roff_so(ROFF_ARGS);
 200 static  enum rofferr     roff_tr(ROFF_ARGS);
 201 static  enum rofferr     roff_TE(ROFF_ARGS);
 202 static  enum rofferr     roff_TS(ROFF_ARGS);
 203 static  enum rofferr     roff_EQ(ROFF_ARGS);
 204 static  enum rofferr     roff_EN(ROFF_ARGS);
 205 static  enum rofferr     roff_T_(ROFF_ARGS);
 206 static  enum rofferr     roff_userdef(ROFF_ARGS);
 207
 208 /* See roffhash_find() */
 209
 210 #define ASCII_HI         126
 211 #define ASCII_LO         33
 212 #define HASHWIDTH       (ASCII_HI - ASCII_LO + 1)
 213
 214 static  struct roffmac  *hash[HASHWIDTH];
 215
 216 static  struct roffmac   roffs[ROFF_MAX] = {
 217         { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
 218         { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 219         { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 220         { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 221         { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 222         { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 223         { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 224         { "ds", roff_ds, NULL, NULL, 0, NULL },
 225         { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 226         { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
 227         { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 228         { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 229         { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 230         { "it", roff_line_ignore, NULL, NULL, 0, NULL },
 231         { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
 232         { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
 233         { "nr", roff_nr, NULL, NULL, 0, NULL },
 234         { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
 235         { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
 236         { "rm", roff_rm, NULL, NULL, 0, NULL },
 237         { "so", roff_so, NULL, NULL, 0, NULL },
 238         { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
 239         { "tr", roff_tr, NULL, NULL, 0, NULL },
 240         { "TS", roff_TS, NULL, NULL, 0, NULL },
 241         { "TE", roff_TE, NULL, NULL, 0, NULL },
 242         { "T&", roff_T_, NULL, NULL, 0, NULL },
 243         { "EQ", roff_EQ, NULL, NULL, 0, NULL },
 244         { "EN", roff_EN, NULL, NULL, 0, NULL },
 245         { ".", roff_cblock, NULL, NULL, 0, NULL },
 246         { "\\}", roff_ccond, NULL, NULL, 0, NULL },
 247         { NULL, roff_userdef, NULL, NULL, 0, NULL },
 248 };
 249
 250 /* Array of injected predefined strings. */
 251 #define PREDEFS_MAX      38
 252 static  const struct predef predefs[PREDEFS_MAX] = {
 253 #include "predefs.in"
 254 };
 255
 256 /* See roffhash_find() */
 257 #define ROFF_HASH(p)    (p[0] - ASCII_LO)
 258
 259 static void
 260 roffhash_init(void)
 261 {
 262         struct roffmac   *n;
 263         int               buc, i;
 264
 265         for (i = 0; i < (int)ROFF_USERDEF; i++) {
 266                 assert(roffs[i].name[0] >= ASCII_LO);
 267                 assert(roffs[i].name[0] <= ASCII_HI);
 268
 269                 buc = ROFF_HASH(roffs[i].name);
 270
 271                 if (NULL != (n = hash[buc])) {
 272                         for ( ; n->next; n = n->next)
 273                                 /* Do nothing. */ ;
 274                         n->next = &roffs[i];
 275                 } else
 276                         hash[buc] = &roffs[i];
 277         }
 278 }
 279
 280 /*
 281  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
 282  * the nil-terminated string name could be found.
 283  */
 284 static enum rofft
 285 roffhash_find(const char *p, size_t s)
 286 {
 287         int              buc;
 288         struct roffmac  *n;
 289
 290         /*
 291          * libroff has an extremely simple hashtable, for the time
 292          * being, which simply keys on the first character, which must
 293          * be printable, then walks a chain.  It works well enough until
 294          * optimised.
 295          */
 296
 297         if (p[0] < ASCII_LO || p[0] > ASCII_HI)
 298                 return(ROFF_MAX);
 299
 300         buc = ROFF_HASH(p);
 301
 302         if (NULL == (n = hash[buc]))
 303                 return(ROFF_MAX);
 304         for ( ; n; n = n->next)
 305                 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
 306                         return((enum rofft)(n - roffs));
 307
 308         return(ROFF_MAX);
 309 }
 310
 311
 312 /*
 313  * Pop the current node off of the stack of roff instructions currently
 314  * pending.
 315  */
 316 static void
 317 roffnode_pop(struct roff *r)
 318 {
 319         struct roffnode *p;
 320
 321         assert(r->last);
 322         p = r->last;
 323
 324         r->last = r->last->parent;
 325         free(p->name);
 326         free(p->end);
 327         free(p);
 328 }
 329
 330
 331 /*
 332  * Push a roff node onto the instruction stack.  This must later be
 333  * removed with roffnode_pop().
 334  */
 335 static void
 336 roffnode_push(struct roff *r, enum rofft tok, const char *name,
 337                 int line, int col)
 338 {
 339         struct roffnode *p;
 340
 341         p = mandoc_calloc(1, sizeof(struct roffnode));
 342         p->tok = tok;
 343         if (name)
 344                 p->name = mandoc_strdup(name);
 345         p->parent = r->last;
 346         p->line = line;
 347         p->col = col;
 348         p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
 349
 350         r->last = p;
 351 }
 352
 353
 354 static void
 355 roff_free1(struct roff *r)
 356 {
 357         struct tbl_node *t;
 358         struct eqn_node *e;
 359         int              i;
 360
 361         while (NULL != (t = r->first_tbl)) {
 362                 r->first_tbl = t->next;
 363                 tbl_free(t);
 364         }
 365
 366         r->first_tbl = r->last_tbl = r->tbl = NULL;
 367
 368         while (NULL != (e = r->first_eqn)) {
 369                 r->first_eqn = e->next;
 370                 eqn_free(e);
 371         }
 372
 373         r->first_eqn = r->last_eqn = r->eqn = NULL;
 374
 375         while (r->last)
 376                 roffnode_pop(r);
 377
 378         roff_freestr(r->strtab);
 379         roff_freestr(r->xmbtab);
 380
 381         r->strtab = r->xmbtab = NULL;
 382
 383         if (r->xtab)
 384                 for (i = 0; i < 128; i++)
 385                         free(r->xtab[i].p);
 386
 387         free(r->xtab);
 388         r->xtab = NULL;
 389 }
 390
 391 void
 392 roff_reset(struct roff *r)
 393 {
 394         int              i;
 395
 396         roff_free1(r);
 397
 398         memset(&r->regs, 0, sizeof(r->regs));
 399         memset(&r->nr, 0, sizeof(r->nr));
 400
 401         for (i = 0; i < PREDEFS_MAX; i++)
 402                 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
 403 }
 404
 405
 406 void
 407 roff_free(struct roff *r)
 408 {
 409
 410         roff_free1(r);
 411         free(r);
 412 }
 413
 414
 415 struct roff *
 416 roff_alloc(struct mparse *parse)
 417 {
 418         struct roff     *r;
 419         int              i;
 420
 421         r = mandoc_calloc(1, sizeof(struct roff));
 422         r->parse = parse;
 423         r->rstackpos = -1;
 424
 425         roffhash_init();
 426
 427         for (i = 0; i < PREDEFS_MAX; i++)
 428                 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
 429
 430         return(r);
 431 }
 432
 433 /*
 434  * Pre-filter each and every line for reserved words (one beginning with
 435  * `\*', e.g., `\*(ab').  These must be handled before the actual line
 436  * is processed.
 437  * This also checks the syntax of regular escapes.
 438  */
 439 static enum rofferr
 440 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
 441 {
 442         enum mandoc_esc  esc;
 443         const char      *stesc; /* start of an escape sequence ('\\') */
 444         const char      *stnam; /* start of the name, after "[(*" */
 445         const char      *cp;    /* end of the name, e.g. before ']' */
 446         const char      *res;   /* the string to be substituted */
 447         int              i, maxl, expand_count;
 448         size_t           nsz;
 449         char            *n;
 450
 451         expand_count = 0;
 452
 453 again:
 454         cp = *bufp + pos;
 455         while (NULL != (cp = strchr(cp, '\\'))) {
 456                 stesc = cp++;
 457
 458                 /*
 459                  * The second character must be an asterisk.
 460                  * If it isn't, skip it anyway:  It is escaped,
 461                  * so it can't start another escape sequence.
 462                  */
 463
 464                 if ('\0' == *cp)
 465                         return(ROFF_CONT);
 466
 467                 if ('*' != *cp) {
 468                         res = cp;
 469                         esc = mandoc_escape(&cp, NULL, NULL);
 470                         if (ESCAPE_ERROR != esc)
 471                                 continue;
 472                         cp = res;
 473                         mandoc_msg
 474                                 (MANDOCERR_BADESCAPE, r->parse,
 475                                  ln, (int)(stesc - *bufp), NULL);
 476                         return(ROFF_CONT);
 477                 }
 478
 479                 cp++;
 480
 481                 /*
 482                  * The third character decides the length
 483                  * of the name of the string.
 484                  * Save a pointer to the name.
 485                  */
 486
 487                 switch (*cp) {
 488                 case ('\0'):
 489                         return(ROFF_CONT);
 490                 case ('('):
 491                         cp++;
 492                         maxl = 2;
 493                         break;
 494                 case ('['):
 495                         cp++;
 496                         maxl = 0;
 497                         break;
 498                 default:
 499                         maxl = 1;
 500                         break;
 501                 }
 502                 stnam = cp;
 503
 504                 /* Advance to the end of the name. */
 505
 506                 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
 507                         if ('\0' == *cp) {
 508                                 mandoc_msg
 509                                         (MANDOCERR_BADESCAPE,
 510                                          r->parse, ln,
 511                                          (int)(stesc - *bufp), NULL);
 512                                 return(ROFF_CONT);
 513                         }
 514                         if (0 == maxl && ']' == *cp)
 515                                 break;
 516                 }
 517
 518                 /*
 519                  * Retrieve the replacement string; if it is
 520                  * undefined, resume searching for escapes.
 521                  */
 522
 523                 res = roff_getstrn(r, stnam, (size_t)i);
 524
 525                 if (NULL == res) {
 526                         mandoc_msg
 527                                 (MANDOCERR_BADESCAPE, r->parse,
 528                                  ln, (int)(stesc - *bufp), NULL);
 529                         res = "";
 530                 }
 531
 532                 /* Replace the escape sequence by the string. */
 533
 534                 pos = stesc - *bufp;
 535
 536                 nsz = *szp + strlen(res) + 1;
 537                 n = mandoc_malloc(nsz);
 538
 539                 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
 540                 strlcat(n, res, nsz);
 541                 strlcat(n, cp + (maxl ? 0 : 1), nsz);
 542
 543                 free(*bufp);
 544
 545                 *bufp = n;
 546                 *szp = nsz;
 547
 548                 if (EXPAND_LIMIT >= ++expand_count)
 549                         goto again;
 550
 551                 /* Just leave the string unexpanded. */
 552                 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
 553                 return(ROFF_IGN);
 554         }
 555         return(ROFF_CONT);
 556 }
 557
 558 /*
 559  * Process text streams: convert all breakable hyphens into ASCII_HYPH.
 560  */
 561 static enum rofferr
 562 roff_parsetext(char *p)
 563 {
 564         size_t           sz;
 565         const char      *start;
 566         enum mandoc_esc  esc;
 567
 568         start = p;
 569
 570         while ('\0' != *p) {
 571                 sz = strcspn(p, "-\\");
 572                 p += sz;
 573
 574                 if ('\0' == *p)
 575                         break;
 576
 577                 if ('\\' == *p) {
 578                         /* Skip over escapes. */
 579                         p++;
 580                         esc = mandoc_escape
 581                                 ((const char **)/*XXX*/(void *)&p, NULL, NULL);
 582                         if (ESCAPE_ERROR == esc)
 583                                 break;
 584                         continue;
 585                 } else if (p == start) {
 586                         p++;
 587                         continue;
 588                 }
 589
 590                 if (isalpha((unsigned char)p[-1]) &&
 591                     isalpha((unsigned char)p[1]))
 592                         *p = ASCII_HYPH;
 593                 p++;
 594         }
 595
 596         return(ROFF_CONT);
 597 }
 598
 599 enum rofferr
 600 roff_parseln(struct roff *r, int ln, char **bufp,
 601                 size_t *szp, int pos, int *offs)
 602 {
 603         enum rofft       t;
 604         enum rofferr     e;
 605         int              ppos, ctl;
 606
 607         /*
 608          * Run the reserved-word filter only if we have some reserved
 609          * words to fill in.
 610          */
 611
 612         e = roff_res(r, bufp, szp, ln, pos);
 613         if (ROFF_IGN == e)
 614                 return(e);
 615         assert(ROFF_CONT == e);
 616
 617         ppos = pos;
 618         ctl = mandoc_getcontrol(*bufp, &pos);
 619
 620         /*
 621          * First, if a scope is open and we're not a macro, pass the
 622          * text through the macro's filter.  If a scope isn't open and
 623          * we're not a macro, just let it through.
 624          * Finally, if there's an equation scope open, divert it into it
 625          * no matter our state.
 626          */
 627
 628         if (!ctl) {
 629                 if (r->last) {
 630                         t = r->last->tok;
 631                         assert(roffs[t].text);
 632                         e = (*roffs[t].text)
 633                             (r, t, bufp, szp, ln, pos, pos, offs);
 634                         assert(ROFF_IGN == e || ROFF_CONT == e);
 635                         if (ROFF_CONT != e)
 636                                 return(e);
 637                 }
 638                 if (r->eqn)
 639                         return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
 640                 if (r->tbl)
 641                         return(tbl_read(r->tbl, ln, *bufp, pos));
 642                 return(roff_parsetext(*bufp + pos));
 643         } else if (r->eqn)
 644                 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
 645
 646         /*
 647          * If a scope is open, go to the child handler for that macro,
 648          * as it may want to preprocess before doing anything with it.
 649          * Don't do so if an equation is open.
 650          */
 651
 652         if (r->last) {
 653                 t = r->last->tok;
 654                 assert(roffs[t].sub);
 655                 return((*roffs[t].sub)
 656                                 (r, t, bufp, szp,
 657                                  ln, ppos, pos, offs));
 658         }
 659
 660         /*
 661          * Lastly, as we've no scope open, try to look up and execute
 662          * the new macro.  If no macro is found, simply return and let
 663          * the compilers handle it.
 664          */
 665
 666         if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
 667                 return(ROFF_CONT);
 668
 669         assert(roffs[t].proc);
 670         return((*roffs[t].proc)
 671                         (r, t, bufp, szp,
 672                          ln, ppos, pos, offs));
 673 }
 674
 675
 676 void
 677 roff_endparse(struct roff *r)
 678 {
 679
 680         if (r->last)
 681                 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
 682                                 r->last->line, r->last->col, NULL);
 683
 684         if (r->eqn) {
 685                 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
 686                                 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
 687                 eqn_end(&r->eqn);
 688         }
 689
 690         if (r->tbl) {
 691                 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
 692                                 r->tbl->line, r->tbl->pos, NULL);
 693                 tbl_end(&r->tbl);
 694         }
 695 }
 696
 697 /*
 698  * Parse a roff node's type from the input buffer.  This must be in the
 699  * form of ".foo xxx" in the usual way.
 700  */
 701 static enum rofft
 702 roff_parse(struct roff *r, const char *buf, int *pos)
 703 {
 704         const char      *mac;
 705         size_t           maclen;
 706         enum rofft       t;
 707
 708         if ('\0' == buf[*pos] || '"' == buf[*pos] ||
 709                         '\t' == buf[*pos] || ' ' == buf[*pos])
 710                 return(ROFF_MAX);
 711
 712         /*
 713          * We stop the macro parse at an escape, tab, space, or nil.
 714          * However, `\}' is also a valid macro, so make sure we don't
 715          * clobber it by seeing the `\' as the end of token.
 716          */
 717
 718         mac = buf + *pos;
 719         maclen = strcspn(mac + 1, " \\\t\0") + 1;
 720
 721         t = (r->current_string = roff_getstrn(r, mac, maclen))
 722             ? ROFF_USERDEF : roffhash_find(mac, maclen);
 723
 724         *pos += (int)maclen;
 725
 726         while (buf[*pos] && ' ' == buf[*pos])
 727                 (*pos)++;
 728
 729         return(t);
 730 }
 731
 732 /* ARGSUSED */
 733 static enum rofferr
 734 roff_cblock(ROFF_ARGS)
 735 {
 736
 737         /*
 738          * A block-close `..' should only be invoked as a child of an
 739          * ignore macro, otherwise raise a warning and just ignore it.
 740          */
 741
 742         if (NULL == r->last) {
 743                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 744                 return(ROFF_IGN);
 745         }
 746
 747         switch (r->last->tok) {
 748         case (ROFF_am):
 749                 /* FALLTHROUGH */
 750         case (ROFF_ami):
 751                 /* FALLTHROUGH */
 752         case (ROFF_am1):
 753                 /* FALLTHROUGH */
 754         case (ROFF_de):
 755                 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
 756                 /* FALLTHROUGH */
 757         case (ROFF_dei):
 758                 /* FALLTHROUGH */
 759         case (ROFF_ig):
 760                 break;
 761         default:
 762                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 763                 return(ROFF_IGN);
 764         }
 765
 766         if ((*bufp)[pos])
 767                 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
 768
 769         roffnode_pop(r);
 770         roffnode_cleanscope(r);
 771         return(ROFF_IGN);
 772
 773 }
 774
 775
 776 static void
 777 roffnode_cleanscope(struct roff *r)
 778 {
 779
 780         while (r->last) {
 781                 if (--r->last->endspan < 0)
 782                         break;
 783                 roffnode_pop(r);
 784         }
 785 }
 786
 787
 788 /* ARGSUSED */
 789 static enum rofferr
 790 roff_ccond(ROFF_ARGS)
 791 {
 792
 793         if (NULL == r->last) {
 794                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 795                 return(ROFF_IGN);
 796         }
 797
 798         switch (r->last->tok) {
 799         case (ROFF_el):
 800                 /* FALLTHROUGH */
 801         case (ROFF_ie):
 802                 /* FALLTHROUGH */
 803         case (ROFF_if):
 804                 break;
 805         default:
 806                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 807                 return(ROFF_IGN);
 808         }
 809
 810         if (r->last->endspan > -1) {
 811                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 812                 return(ROFF_IGN);
 813         }
 814
 815         if ((*bufp)[pos])
 816                 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
 817
 818         roffnode_pop(r);
 819         roffnode_cleanscope(r);
 820         return(ROFF_IGN);
 821 }
 822
 823
 824 /* ARGSUSED */
 825 static enum rofferr
 826 roff_block(ROFF_ARGS)
 827 {
 828         int             sv;
 829         size_t          sz;
 830         char            *name;
 831
 832         name = NULL;
 833
 834         if (ROFF_ig != tok) {
 835                 if ('\0' == (*bufp)[pos]) {
 836                         mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
 837                         return(ROFF_IGN);
 838                 }
 839
 840                 /*
 841                  * Re-write `de1', since we don't really care about
 842                  * groff's strange compatibility mode, into `de'.
 843                  */
 844
 845                 if (ROFF_de1 == tok)
 846                         tok = ROFF_de;
 847                 if (ROFF_de == tok)
 848                         name = *bufp + pos;
 849                 else
 850                         mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
 851                             roffs[tok].name);
 852
 853                 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
 854                         pos++;
 855
 856                 while (isspace((unsigned char)(*bufp)[pos]))
 857                         (*bufp)[pos++] = '\0';
 858         }
 859
 860         roffnode_push(r, tok, name, ln, ppos);
 861
 862         /*
 863          * At the beginning of a `de' macro, clear the existing string
 864          * with the same name, if there is one.  New content will be
 865          * added from roff_block_text() in multiline mode.
 866          */
 867
 868         if (ROFF_de == tok)
 869                 roff_setstr(r, name, "", 0);
 870
 871         if ('\0' == (*bufp)[pos])
 872                 return(ROFF_IGN);
 873
 874         /* If present, process the custom end-of-line marker. */
 875
 876         sv = pos;
 877         while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
 878                 pos++;
 879
 880         /*
 881          * Note: groff does NOT like escape characters in the input.
 882          * Instead of detecting this, we're just going to let it fly and
 883          * to hell with it.
 884          */
 885
 886         assert(pos > sv);
 887         sz = (size_t)(pos - sv);
 888
 889         if (1 == sz && '.' == (*bufp)[sv])
 890                 return(ROFF_IGN);
 891
 892         r->last->end = mandoc_malloc(sz + 1);
 893
 894         memcpy(r->last->end, *bufp + sv, sz);
 895         r->last->end[(int)sz] = '\0';
 896
 897         if ((*bufp)[pos])
 898                 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
 899
 900         return(ROFF_IGN);
 901 }
 902
 903
 904 /* ARGSUSED */
 905 static enum rofferr
 906 roff_block_sub(ROFF_ARGS)
 907 {
 908         enum rofft      t;
 909         int             i, j;
 910
 911         /*
 912          * First check whether a custom macro exists at this level.  If
 913          * it does, then check against it.  This is some of groff's
 914          * stranger behaviours.  If we encountered a custom end-scope
 915          * tag and that tag also happens to be a "real" macro, then we
 916          * need to try interpreting it again as a real macro.  If it's
 917          * not, then return ignore.  Else continue.
 918          */
 919
 920         if (r->last->end) {
 921                 for (i = pos, j = 0; r->last->end[j]; j++, i++)
 922                         if ((*bufp)[i] != r->last->end[j])
 923                                 break;
 924
 925                 if ('\0' == r->last->end[j] &&
 926                                 ('\0' == (*bufp)[i] ||
 927                                  ' ' == (*bufp)[i] ||
 928                                  '\t' == (*bufp)[i])) {
 929                         roffnode_pop(r);
 930                         roffnode_cleanscope(r);
 931
 932                         while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
 933                                 i++;
 934
 935                         pos = i;
 936                         if (ROFF_MAX != roff_parse(r, *bufp, &pos))
 937                                 return(ROFF_RERUN);
 938                         return(ROFF_IGN);
 939                 }
 940         }
 941
 942         /*
 943          * If we have no custom end-query or lookup failed, then try
 944          * pulling it out of the hashtable.
 945          */
 946
 947         t = roff_parse(r, *bufp, &pos);
 948
 949         /*
 950          * Macros other than block-end are only significant
 951          * in `de' blocks; elsewhere, simply throw them away.
 952          */
 953         if (ROFF_cblock != t) {
 954                 if (ROFF_de == tok)
 955                         roff_setstr(r, r->last->name, *bufp + ppos, 1);
 956                 return(ROFF_IGN);
 957         }
 958
 959         assert(roffs[t].proc);
 960         return((*roffs[t].proc)(r, t, bufp, szp,
 961                                 ln, ppos, pos, offs));
 962 }
 963
 964
 965 /* ARGSUSED */
 966 static enum rofferr
 967 roff_block_text(ROFF_ARGS)
 968 {
 969
 970         if (ROFF_de == tok)
 971                 roff_setstr(r, r->last->name, *bufp + pos, 1);
 972
 973         return(ROFF_IGN);
 974 }
 975
 976
 977 /* ARGSUSED */
 978 static enum rofferr
 979 roff_cond_sub(ROFF_ARGS)
 980 {
 981         enum rofft       t;
 982         enum roffrule    rr;
 983         char            *ep;
 984
 985         rr = r->last->rule;
 986         roffnode_cleanscope(r);
 987
 988         /*
 989          * If the macro is unknown, first check if it contains a closing
 990          * delimiter `\}'.  If it does, close out our scope and return
 991          * the currently-scoped rule (ignore or continue).  Else, drop
 992          * into the currently-scoped rule.
 993          */
 994
 995         if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
 996                 ep = &(*bufp)[pos];
 997                 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
 998                         ep++;
 999                         if ('}' != *ep)
1000                                 continue;
1001
1002                         /*
1003                          * Make the \} go away.
1004                          * This is a little haphazard, as it's not quite
1005                          * clear how nroff does this.
1006                          * If we're at the end of line, then just chop
1007                          * off the \} and resize the buffer.
1008                          * If we aren't, then conver it to spaces.
1009                          */
1010
1011                         if ('\0' == *(ep + 1)) {
1012                                 *--ep = '\0';
1013                                 *szp -= 2;
1014                         } else
1015                                 *(ep - 1) = *ep = ' ';
1016
1017                         roff_ccond(r, ROFF_ccond, bufp, szp,
1018                                         ln, pos, pos + 2, offs);
1019                         break;
1020                 }
1021                 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1022         }
1023
1024         /*
1025          * A denied conditional must evaluate its children if and only
1026          * if they're either structurally required (such as loops and
1027          * conditionals) or a closing macro.
1028          */
1029
1030         if (ROFFRULE_DENY == rr)
1031                 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
1032                         if (ROFF_ccond != t)
1033                                 return(ROFF_IGN);
1034
1035         assert(roffs[t].proc);
1036         return((*roffs[t].proc)(r, t, bufp, szp,
1037                                 ln, ppos, pos, offs));
1038 }
1039
1040 /* ARGSUSED */
1041 static enum rofferr
1042 roff_cond_text(ROFF_ARGS)
1043 {
1044         char            *ep;
1045         enum roffrule    rr;
1046
1047         rr = r->last->rule;
1048         roffnode_cleanscope(r);
1049
1050         ep = &(*bufp)[pos];
1051         for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1052                 ep++;
1053                 if ('}' != *ep)
1054                         continue;
1055                 *ep = '&';
1056                 roff_ccond(r, ROFF_ccond, bufp, szp,
1057                                 ln, pos, pos + 2, offs);
1058         }
1059         return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1060 }
1061
1062 static int
1063 roff_getnum(const char *v, int *pos, int *res)
1064 {
1065         int p, n;
1066
1067         if ((n = (v[*pos] == '-')) != 0)
1068                 (*pos)++;
1069
1070         p = *pos;
1071         for (*res = 0; isdigit((unsigned char)v[p]); p++)
1072                 *res += 10 * *res + v[p] - '0';
1073         if (p == *pos)
1074                 return 0;
1075
1076         if (n)
1077                 *res = -*res;
1078
1079         *pos = p;
1080         return 1;
1081 }
1082
1083 static int
1084 roff_getop(const char *v, int *pos)
1085 {
1086         int c;
1087         switch (c = v[*pos]) {
1088         case '=':
1089         case '!':
1090         case '>':
1091         case '<':
1092                 (*pos)++;
1093                 if (v[*pos] == '=')  {
1094                         (*pos)++;
1095                         return c;
1096                 }
1097                 switch (c) {
1098                 case '=':
1099                         return '=';
1100                 case '>':
1101                         return 'g';
1102                 case '<':
1103                         return 'l';
1104                 default:
1105                         return -1;
1106                 }
1107         default:
1108                 return -1;
1109         }
1110 }
1111
1112 static enum roffrule
1113 roff_evalcond(const char *v, int *pos)
1114 {
1115         int not = 0;
1116         int lh, rh, op;
1117         switch (v[*pos]) {
1118         case ('n'):
1119                 (*pos)++;
1120                 return(ROFFRULE_ALLOW);
1121         case ('e'):
1122                 /* FALLTHROUGH */
1123         case ('o'):
1124                 /* FALLTHROUGH */
1125         case ('t'):
1126                 (*pos)++;
1127                 return(ROFFRULE_DENY);
1128         case ('!'):
1129                 not++;
1130                 (*pos)++;
1131                 break;
1132         default:
1133                 break;
1134         }
1135         if (!roff_getnum(v, pos, &lh))
1136                 return ROFFRULE_DENY;
1137         if ((op = roff_getop(v, pos)) == -1)
1138                 goto out;
1139         if (!roff_getnum(v, pos, &rh))
1140                 return ROFFRULE_DENY;
1141         switch (op) {
1142         case '>':
1143                 lh = lh >= rh;
1144                 break;
1145         case '<':
1146                 lh = lh <= rh;
1147                 break;
1148         case '=':
1149                 lh = lh == rh;
1150                 break;
1151         case '!':
1152                 lh = lh != rh;
1153                 break;
1154         case 'g':
1155                 lh = lh > rh;
1156                 break;
1157         case 'l':
1158                 lh = lh < rh;
1159                 break;
1160         default:
1161                 return ROFFRULE_DENY;
1162         }
1163 out:
1164         if (not)
1165                 lh = !lh;
1166         return lh ? ROFFRULE_ALLOW : ROFFRULE_DENY;
1167 }
1168
1169 /* ARGSUSED */
1170 static enum rofferr
1171 roff_line_ignore(ROFF_ARGS)
1172 {
1173
1174         if (ROFF_it == tok)
1175                 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it");
1176
1177         return(ROFF_IGN);
1178 }
1179
1180 /* ARGSUSED */
1181 static enum rofferr
1182 roff_cond(ROFF_ARGS)
1183 {
1184         int              sv;
1185         enum roffrule    rule;
1186
1187         /*
1188          * An `.el' has no conditional body: it will consume the value
1189          * of the current rstack entry set in prior `ie' calls or
1190          * defaults to DENY.
1191          *
1192          * If we're not an `el', however, then evaluate the conditional.
1193          */
1194
1195         rule = ROFF_el == tok ?
1196                 (r->rstackpos < 0 ?
1197                  ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1198                 roff_evalcond(*bufp, &pos);
1199
1200         sv = pos;
1201         while (' ' == (*bufp)[pos])
1202                 pos++;
1203
1204         /*
1205          * Roff is weird.  If we have just white-space after the
1206          * conditional, it's considered the BODY and we exit without
1207          * really doing anything.  Warn about this.  It's probably
1208          * wrong.
1209          */
1210
1211         if ('\0' == (*bufp)[pos] && sv != pos) {
1212                 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1213                 return(ROFF_IGN);
1214         }
1215
1216         roffnode_push(r, tok, NULL, ln, ppos);
1217
1218         r->last->rule = rule;
1219
1220         /*
1221          * An if-else will put the NEGATION of the current evaluated
1222          * conditional into the stack of rules.
1223          */
1224
1225         if (ROFF_ie == tok) {
1226                 if (r->rstackpos == RSTACK_MAX - 1) {
1227                         mandoc_msg(MANDOCERR_MEM,
1228                                 r->parse, ln, ppos, NULL);
1229                         return(ROFF_ERR);
1230                 }
1231                 r->rstack[++r->rstackpos] =
1232                         ROFFRULE_DENY == r->last->rule ?
1233                         ROFFRULE_ALLOW : ROFFRULE_DENY;
1234         }
1235
1236         /* If the parent has false as its rule, then so do we. */
1237
1238         if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1239                 r->last->rule = ROFFRULE_DENY;
1240
1241         /*
1242          * Determine scope.  If we're invoked with "\{" trailing the
1243          * conditional, then we're in a multiline scope.  Else our scope
1244          * expires on the next line.
1245          */
1246
1247         r->last->endspan = 1;
1248
1249         if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1250                 r->last->endspan = -1;
1251                 pos += 2;
1252         }
1253
1254         /*
1255          * If there are no arguments on the line, the next-line scope is
1256          * assumed.
1257          */
1258
1259         if ('\0' == (*bufp)[pos])
1260                 return(ROFF_IGN);
1261
1262         /* Otherwise re-run the roff parser after recalculating. */
1263
1264         *offs = pos;
1265         return(ROFF_RERUN);
1266 }
1267
1268
1269 /* ARGSUSED */
1270 static enum rofferr
1271 roff_ds(ROFF_ARGS)
1272 {
1273         char            *name, *string;
1274
1275         /*
1276          * A symbol is named by the first word following the macro
1277          * invocation up to a space.  Its value is anything after the
1278          * name's trailing whitespace and optional double-quote.  Thus,
1279          *
1280          *  [.ds foo "bar  "     ]
1281          *
1282          * will have `bar  "     ' as its value.
1283          */
1284
1285         string = *bufp + pos;
1286         name = roff_getname(r, &string, ln, pos);
1287         if ('\0' == *name)
1288                 return(ROFF_IGN);
1289
1290         /* Read past initial double-quote. */
1291         if ('"' == *string)
1292                 string++;
1293
1294         /* The rest is the value. */
1295         roff_setstr(r, name, string, 0);
1296         return(ROFF_IGN);
1297 }
1298
1299 int
1300 roff_regisset(const struct roff *r, enum regs reg)
1301 {
1302
1303         return(r->regs[(int)reg].set);
1304 }
1305
1306 unsigned int
1307 roff_regget(const struct roff *r, enum regs reg)
1308 {
1309
1310         return(r->regs[(int)reg].u);
1311 }
1312
1313 void
1314 roff_regunset(struct roff *r, enum regs reg)
1315 {
1316
1317         r->regs[(int)reg].set = 0;
1318 }
1319
1320 struct roff_nr {
1321         char *str;
1322         uint32_t hash;
1323         intmax_t val;
1324         struct roff_nr *next;
1325 };
1326
1327 static uint32_t
1328 hash_str(const char *str)
1329 {
1330         const uint8_t *s = (const uint8_t *)str;
1331         uint8_t c;
1332         uint32_t hv = 0;
1333         while ((c = *s++) != '\0')
1334                 hv = hv * 33 + c;           /* "perl": k=33, r=r+r/32 */
1335         return hv + (hv >> 5);
1336 }
1337
1338 static struct roff_nr *
1339 hash_find(struct roff *r, const char *str, uint32_t *h)
1340 {
1341         struct roff_nr *e;
1342         *h = hash_str(str) % (sizeof(r->nr) / sizeof(r->nr[0]));
1343
1344         for (e = r->nr[*h]; e; e = e->next)
1345                 if (e->hash == *h && strcmp(e->str, str) == 0)
1346                         return e;
1347         return NULL;
1348 }
1349
1350 static struct roff_nr *
1351 hash_insert(struct roff *r, const char *str, uint32_t h)
1352 {
1353         struct roff_nr *e;
1354
1355         e = mandoc_malloc(sizeof(*e));
1356         e->str = mandoc_strdup(str);
1357         e->hash = h;
1358         e->next = r->nr[h];
1359         r->nr[h] = e;
1360         return e;
1361 }
1362
1363 /* ARGSUSED */
1364 static enum rofferr
1365 roff_nr(ROFF_ARGS)
1366 {
1367         const char      *key;
1368         char            *val;
1369         uint32_t         hv;
1370         struct roff_nr  *h;
1371
1372         val = *bufp + pos;
1373         key = roff_getname(r, &val, ln, pos);
1374
1375         if ((h = hash_find(r, key, &hv)) == NULL)
1376                 h = hash_insert(r, key, hv);
1377
1378         h->val = mandoc_strntoi(val, strlen(val), 10);
1379
1380         if (0 == strcmp(key, "nS")) {
1381                 r->regs[(int)REG_nS].set = 1;
1382                 if (h->val >= 0)
1383                         r->regs[(int)REG_nS].u = (unsigned)h->val;
1384                 else
1385                         r->regs[(int)REG_nS].u = 0u;
1386         }
1387
1388         return(ROFF_IGN);
1389 }
1390
1391 void
1392 roff_expand_nr(struct roff *r, const char *src, int *sp, size_t slen,
1393     char **dst, int *dp, size_t *dlenp)
1394 {
1395         uint32_t         hv;
1396         struct roff_nr  *h;
1397         int              l, s;
1398         char             e, *key;
1399
1400         s = *sp + 2;    /* skip \\\n */
1401
1402         if ('[' == src[s]) {            /* XXX: Support builtins */
1403                 s++;
1404                 e = ']';
1405         } else
1406                 e = '\0';
1407
1408         for (l = s; src[l] && l < (int)slen; l++) {
1409                 if (e) {
1410                         if (src[l] == e)
1411                                 break;
1412                 } else {
1413                         if (!isalnum((unsigned char)src[l]))
1414                                 break;
1415                 }
1416         }
1417         *sp = l;
1418         l -= s;
1419         key = mandoc_malloc(l + 1);
1420         memcpy(key, src + s, l);
1421         key[l] = '\0';
1422
1423         if ((h = hash_find(r, key, &hv)) == NULL) {
1424                 free(key);
1425                 return;
1426         }
1427         if (*dst == NULL || *dlenp - *dp < 256)
1428                 *dst = mandoc_realloc(*dst, *dlenp += 256);
1429
1430         /* XXX: support .af */
1431         *dp += snprintf(*dst + *dp, *dlenp - *dp, "%jd", h->val);
1432 }
1433
1434 /* ARGSUSED */
1435 static enum rofferr
1436 roff_rm(ROFF_ARGS)
1437 {
1438         const char       *name;
1439         char             *cp;
1440
1441         cp = *bufp + pos;
1442         while ('\0' != *cp) {
1443                 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1444                 if ('\0' != *name)
1445                         roff_setstr(r, name, NULL, 0);
1446         }
1447         return(ROFF_IGN);
1448 }
1449
1450 /* ARGSUSED */
1451 static enum rofferr
1452 roff_TE(ROFF_ARGS)
1453 {
1454
1455         if (NULL == r->tbl)
1456                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1457         else
1458                 tbl_end(&r->tbl);
1459
1460         return(ROFF_IGN);
1461 }
1462
1463 /* ARGSUSED */
1464 static enum rofferr
1465 roff_T_(ROFF_ARGS)
1466 {
1467
1468         if (NULL == r->tbl)
1469                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1470         else
1471                 tbl_restart(ppos, ln, r->tbl);
1472
1473         return(ROFF_IGN);
1474 }
1475
1476 #if 0
1477 static int
1478 roff_closeeqn(struct roff *r)
1479 {
1480
1481         return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1482 }
1483 #endif
1484
1485 static void
1486 roff_openeqn(struct roff *r, const char *name, int line,
1487                 int offs, const char *buf)
1488 {
1489         struct eqn_node *e;
1490         int              poff;
1491
1492         assert(NULL == r->eqn);
1493         e = eqn_alloc(name, offs, line, r->parse);
1494
1495         if (r->last_eqn)
1496                 r->last_eqn->next = e;
1497         else
1498                 r->first_eqn = r->last_eqn = e;
1499
1500         r->eqn = r->last_eqn = e;
1501
1502         if (buf) {
1503                 poff = 0;
1504                 eqn_read(&r->eqn, line, buf, offs, &poff);
1505         }
1506 }
1507
1508 /* ARGSUSED */
1509 static enum rofferr
1510 roff_EQ(ROFF_ARGS)
1511 {
1512
1513         roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1514         return(ROFF_IGN);
1515 }
1516
1517 /* ARGSUSED */
1518 static enum rofferr
1519 roff_EN(ROFF_ARGS)
1520 {
1521
1522         mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1523         return(ROFF_IGN);
1524 }
1525
1526 /* ARGSUSED */
1527 static enum rofferr
1528 roff_TS(ROFF_ARGS)
1529 {
1530         struct tbl_node *t;
1531
1532         if (r->tbl) {
1533                 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1534                 tbl_end(&r->tbl);
1535         }
1536
1537         t = tbl_alloc(ppos, ln, r->parse);
1538
1539         if (r->last_tbl)
1540                 r->last_tbl->next = t;
1541         else
1542                 r->first_tbl = r->last_tbl = t;
1543
1544         r->tbl = r->last_tbl = t;
1545         return(ROFF_IGN);
1546 }
1547
1548 /* ARGSUSED */
1549 static enum rofferr
1550 roff_tr(ROFF_ARGS)
1551 {
1552         const char      *p, *first, *second;
1553         size_t           fsz, ssz;
1554         enum mandoc_esc  esc;
1555
1556         p = *bufp + pos;
1557
1558         if ('\0' == *p) {
1559                 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1560                 return(ROFF_IGN);
1561         }
1562
1563         while ('\0' != *p) {
1564                 fsz = ssz = 1;
1565
1566                 first = p++;
1567                 if ('\\' == *first) {
1568                         esc = mandoc_escape(&p, NULL, NULL);
1569                         if (ESCAPE_ERROR == esc) {
1570                                 mandoc_msg
1571                                         (MANDOCERR_BADESCAPE, r->parse,
1572                                          ln, (int)(p - *bufp), NULL);
1573                                 return(ROFF_IGN);
1574                         }
1575                         fsz = (size_t)(p - first);
1576                 }
1577
1578                 second = p++;
1579                 if ('\\' == *second) {
1580                         esc = mandoc_escape(&p, NULL, NULL);
1581                         if (ESCAPE_ERROR == esc) {
1582                                 mandoc_msg
1583                                         (MANDOCERR_BADESCAPE, r->parse,
1584                                          ln, (int)(p - *bufp), NULL);
1585                                 return(ROFF_IGN);
1586                         }
1587                         ssz = (size_t)(p - second);
1588                 } else if ('\0' == *second) {
1589                         mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1590                                         ln, (int)(p - *bufp), NULL);
1591                         second = " ";
1592                         p--;
1593                 }
1594
1595                 if (fsz > 1) {
1596                         roff_setstrn(&r->xmbtab, first,
1597                                         fsz, second, ssz, 0);
1598                         continue;
1599                 }
1600
1601                 if (NULL == r->xtab)
1602                         r->xtab = mandoc_calloc
1603                                 (128, sizeof(struct roffstr));
1604
1605                 free(r->xtab[(int)*first].p);
1606                 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1607                 r->xtab[(int)*first].sz = ssz;
1608         }
1609
1610         return(ROFF_IGN);
1611 }
1612
1613 /* ARGSUSED */
1614 static enum rofferr
1615 roff_so(ROFF_ARGS)
1616 {
1617         char *name;
1618
1619         mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1620
1621         /*
1622          * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
1623          * opening anything that's not in our cwd or anything beneath
1624          * it.  Thus, explicitly disallow traversing up the file-system
1625          * or using absolute paths.
1626          */
1627
1628         name = *bufp + pos;
1629         if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1630                 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1631                 return(ROFF_ERR);
1632         }
1633
1634         *offs = pos;
1635         return(ROFF_SO);
1636 }
1637
1638 /* ARGSUSED */
1639 static enum rofferr
1640 roff_userdef(ROFF_ARGS)
1641 {
1642         const char       *arg[9];
1643         char             *cp, *n1, *n2;
1644         int               i;
1645
1646         /*
1647          * Collect pointers to macro argument strings
1648          * and null-terminate them.
1649          */
1650         cp = *bufp + pos;
1651         for (i = 0; i < 9; i++)
1652                 arg[i] = '\0' == *cp ? "" :
1653                     mandoc_getarg(r->parse, &cp, ln, &pos);
1654
1655         /*
1656          * Expand macro arguments.
1657          */
1658         *szp = 0;
1659         n1 = cp = mandoc_strdup(r->current_string);
1660         while (NULL != (cp = strstr(cp, "\\$"))) {
1661                 i = cp[2] - '1';
1662                 if (0 > i || 8 < i) {
1663                         /* Not an argument invocation. */
1664                         cp += 2;
1665                         continue;
1666                 }
1667
1668                 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1669                 n2 = mandoc_malloc(*szp);
1670
1671                 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1672                 strlcat(n2, arg[i], *szp);
1673                 strlcat(n2, cp + 3, *szp);
1674
1675                 cp = n2 + (cp - n1);
1676                 free(n1);
1677                 n1 = n2;
1678         }
1679
1680         /*
1681          * Replace the macro invocation
1682          * by the expanded macro.
1683          */
1684         free(*bufp);
1685         *bufp = n1;
1686         if (0 == *szp)
1687                 *szp = strlen(*bufp) + 1;
1688
1689         return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1690            ROFF_REPARSE : ROFF_APPEND);
1691 }
1692
1693 static char *
1694 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1695 {
1696         char     *name, *cp;
1697
1698         name = *cpp;
1699         if ('\0' == *name)
1700                 return(name);
1701
1702         /* Read until end of name. */
1703         for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1704                 if ('\\' != *cp)
1705                         continue;
1706                 cp++;
1707                 if ('\\' == *cp)
1708                         continue;
1709                 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1710                 *cp = '\0';
1711                 name = cp;
1712         }
1713
1714         /* Nil-terminate name. */
1715         if ('\0' != *cp)
1716                 *(cp++) = '\0';
1717
1718         /* Read past spaces. */
1719         while (' ' == *cp)
1720                 cp++;
1721
1722         *cpp = cp;
1723         return(name);
1724 }
1725
1726 /*
1727  * Store *string into the user-defined string called *name.
1728  * In multiline mode, append to an existing entry and append '\n';
1729  * else replace the existing entry, if there is one.
1730  * To clear an existing entry, call with (*r, *name, NULL, 0).
1731  */
1732 static void
1733 roff_setstr(struct roff *r, const char *name, const char *string,
1734         int multiline)
1735 {
1736
1737         roff_setstrn(&r->strtab, name, strlen(name), string,
1738                         string ? strlen(string) : 0, multiline);
1739 }
1740
1741 static void
1742 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1743                 const char *string, size_t stringsz, int multiline)
1744 {
1745         struct roffkv   *n;
1746         char            *c;
1747         int              i;
1748         size_t           oldch, newch;
1749
1750         /* Search for an existing string with the same name. */
1751         n = *r;
1752
1753         while (n && strcmp(name, n->key.p))
1754                 n = n->next;
1755
1756         if (NULL == n) {
1757                 /* Create a new string table entry. */
1758                 n = mandoc_malloc(sizeof(struct roffkv));
1759                 n->key.p = mandoc_strndup(name, namesz);
1760                 n->key.sz = namesz;
1761                 n->val.p = NULL;
1762                 n->val.sz = 0;
1763                 n->next = *r;
1764                 *r = n;
1765         } else if (0 == multiline) {
1766                 /* In multiline mode, append; else replace. */
1767                 free(n->val.p);
1768                 n->val.p = NULL;
1769                 n->val.sz = 0;
1770         }
1771
1772         if (NULL == string)
1773                 return;
1774
1775         /*
1776          * One additional byte for the '\n' in multiline mode,
1777          * and one for the terminating '\0'.
1778          */
1779         newch = stringsz + (multiline ? 2u : 1u);
1780
1781         if (NULL == n->val.p) {
1782                 n->val.p = mandoc_malloc(newch);
1783                 *n->val.p = '\0';
1784                 oldch = 0;
1785         } else {
1786                 oldch = n->val.sz;
1787                 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1788         }
1789
1790         /* Skip existing content in the destination buffer. */
1791         c = n->val.p + (int)oldch;
1792
1793         /* Append new content to the destination buffer. */
1794         i = 0;
1795         while (i < (int)stringsz) {
1796                 /*
1797                  * Rudimentary roff copy mode:
1798                  * Handle escaped backslashes.
1799                  */
1800                 if ('\\' == string[i] && '\\' == string[i + 1])
1801                         i++;
1802                 *c++ = string[i++];
1803         }
1804
1805         /* Append terminating bytes. */
1806         if (multiline)
1807                 *c++ = '\n';
1808
1809         *c = '\0';
1810         n->val.sz = (int)(c - n->val.p);
1811 }
1812
1813 static const char *
1814 roff_getstrn(const struct roff *r, const char *name, size_t len)
1815 {
1816         const struct roffkv *n;
1817
1818         for (n = r->strtab; n; n = n->next)
1819                 if (0 == strncmp(name, n->key.p, len) &&
1820                                 '\0' == n->key.p[(int)len])
1821                         return(n->val.p);
1822
1823         return(NULL);
1824 }
1825
1826 static void
1827 roff_freestr(struct roffkv *r)
1828 {
1829         struct roffkv    *n, *nn;
1830
1831         for (n = r; n; n = nn) {
1832                 free(n->key.p);
1833                 free(n->val.p);
1834                 nn = n->next;
1835                 free(n);
1836         }
1837 }
1838
1839 const struct tbl_span *
1840 roff_span(const struct roff *r)
1841 {
1842
1843         return(r->tbl ? tbl_span(r->tbl) : NULL);
1844 }
1845
1846 const struct eqn *
1847 roff_eqn(const struct roff *r)
1848 {
1849
1850         return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1851 }
1852
1853 /*
1854  * Duplicate an input string, making the appropriate character
1855  * conversations (as stipulated by `tr') along the way.
1856  * Returns a heap-allocated string with all the replacements made.
1857  */
1858 char *
1859 roff_strdup(const struct roff *r, const char *p)
1860 {
1861         const struct roffkv *cp;
1862         char            *res;
1863         const char      *pp;
1864         size_t           ssz, sz;
1865         enum mandoc_esc  esc;
1866
1867         if (NULL == r->xmbtab && NULL == r->xtab)
1868                 return(mandoc_strdup(p));
1869         else if ('\0' == *p)
1870                 return(mandoc_strdup(""));
1871
1872         /*
1873          * Step through each character looking for term matches
1874          * (remember that a `tr' can be invoked with an escape, which is
1875          * a glyph but the escape is multi-character).
1876          * We only do this if the character hash has been initialised
1877          * and the string is >0 length.
1878          */
1879
1880         res = NULL;
1881         ssz = 0;
1882
1883         while ('\0' != *p) {
1884                 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1885                         sz = r->xtab[(int)*p].sz;
1886                         res = mandoc_realloc(res, ssz + sz + 1);
1887                         memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1888                         ssz += sz;
1889                         p++;
1890                         continue;
1891                 } else if ('\\' != *p) {
1892                         res = mandoc_realloc(res, ssz + 2);
1893                         res[ssz++] = *p++;
1894                         continue;
1895                 }
1896
1897                 /* Search for term matches. */
1898                 for (cp = r->xmbtab; cp; cp = cp->next)
1899                         if (0 == strncmp(p, cp->key.p, cp->key.sz))
1900                                 break;
1901
1902                 if (NULL != cp) {
1903                         /*
1904                          * A match has been found.
1905                          * Append the match to the array and move
1906                          * forward by its keysize.
1907                          */
1908                         res = mandoc_realloc
1909                                 (res, ssz + cp->val.sz + 1);
1910                         memcpy(res + ssz, cp->val.p, cp->val.sz);
1911                         ssz += cp->val.sz;
1912                         p += (int)cp->key.sz;
1913                         continue;
1914                 }
1915
1916                 /*
1917                  * Handle escapes carefully: we need to copy
1918                  * over just the escape itself, or else we might
1919                  * do replacements within the escape itself.
1920                  * Make sure to pass along the bogus string.
1921                  */
1922                 pp = p++;
1923                 esc = mandoc_escape(&p, NULL, NULL);
1924                 if (ESCAPE_ERROR == esc) {
1925                         sz = strlen(pp);
1926                         res = mandoc_realloc(res, ssz + sz + 1);
1927                         memcpy(res + ssz, pp, sz);
1928                         break;
1929                 }
1930                 /*
1931                  * We bail out on bad escapes.
1932                  * No need to warn: we already did so when
1933                  * roff_res() was called.
1934                  */
1935                 sz = (int)(p - pp);
1936                 res = mandoc_realloc(res, ssz + sz + 1);
1937                 memcpy(res + ssz, pp, sz);
1938                 ssz += sz;
1939         }
1940
1941         res[(int)ssz] = '\0';
1942         return(res);
1943 }