src/backend/regex/regc_nfa.c

   1 /*
   2  * NFA utilities.
   3  * This file is #included by regcomp.c.
   4  *
   5  * Copyright (c) 1998, 1999 Henry Spencer.      All rights reserved.
   6  *
   7  * Development of this software was funded, in part, by Cray Research Inc.,
   8  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
   9  * Corporation, none of whom are responsible for the results.  The author
  10  * thanks all of them.
  11  *
  12  * Redistribution and use in source and binary forms -- with or without
  13  * modification -- are permitted for any purpose, provided that
  14  * redistributions in source form retain this entire copyright notice and
  15  * indicate the origin and nature of any modifications.
  16  *
  17  * I'd appreciate being given credit for this package in the documentation
  18  * of software which uses it, but that is not a requirement.
  19  *
  20  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
  21  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  22  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
  23  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30  *
  31  * $PostgreSQL$
  32  *
  33  *
  34  * One or two things that technically ought to be in here
  35  * are actually in color.c, thanks to some incestuous relationships in
  36  * the color chains.
  37  */
  38
  39 #define NISERR()        VISERR(nfa->v)
  40 #define NERR(e)         VERR(nfa->v, (e))
  41
  42
  43 /*
  44  * newnfa - set up an NFA
  45  */
  46 static struct nfa *                             /* the NFA, or NULL */
  47 newnfa(struct vars * v,
  48            struct colormap * cm,
  49            struct nfa * parent)         /* NULL if primary NFA */
  50 {
  51         struct nfa *nfa;
  52
  53         nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
  54         if (nfa == NULL)
  55                 return NULL;
  56
  57         nfa->states = NULL;
  58         nfa->slast = NULL;
  59         nfa->free = NULL;
  60         nfa->nstates = 0;
  61         nfa->cm = cm;
  62         nfa->v = v;
  63         nfa->size = 0;
  64         nfa->bos[0] = nfa->bos[1] = COLORLESS;
  65         nfa->eos[0] = nfa->eos[1] = COLORLESS;
  66         nfa->parent = parent;           /* Precedes newfstate so parent is valid. */
  67         nfa->post = newfstate(nfa, '@');        /* number 0 */
  68         nfa->pre = newfstate(nfa, '>');         /* number 1 */
  69
  70         nfa->init = newstate(nfa);      /* may become invalid later */
  71         nfa->final = newstate(nfa);
  72         if (ISERR())
  73         {
  74                 freenfa(nfa);
  75                 return NULL;
  76         }
  77         rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
  78         newarc(nfa, '^', 1, nfa->pre, nfa->init);
  79         newarc(nfa, '^', 0, nfa->pre, nfa->init);
  80         rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
  81         newarc(nfa, '$', 1, nfa->final, nfa->post);
  82         newarc(nfa, '$', 0, nfa->final, nfa->post);
  83
  84         if (ISERR())
  85         {
  86                 freenfa(nfa);
  87                 return NULL;
  88         }
  89         return nfa;
  90 }
  91
  92 /*
  93  * TooManyStates - checks if the max states exceeds the compile-time value
  94  */
  95 static int
  96 TooManyStates(struct nfa *nfa)
  97 {
  98         struct nfa *parent = nfa->parent;
  99         size_t          sz = nfa->size;
 100
 101         while (parent != NULL)
 102         {
 103                 sz = parent->size;
 104                 parent = parent->parent;
 105         }
 106         if (sz > REG_MAX_STATES)
 107                 return 1;
 108         return 0;
 109 }
 110
 111 /*
 112  * IncrementSize - increases the tracked size of the NFA and its parents.
 113  */
 114 static void
 115 IncrementSize(struct nfa *nfa)
 116 {
 117         struct nfa *parent = nfa->parent;
 118
 119         nfa->size++;
 120         while (parent != NULL)
 121         {
 122                 parent->size++;
 123                 parent = parent->parent;
 124         }
 125 }
 126
 127 /*
 128  * DecrementSize - decreases the tracked size of the NFA and its parents.
 129  */
 130 static void
 131 DecrementSize(struct nfa *nfa)
 132 {
 133         struct nfa *parent = nfa->parent;
 134
 135         nfa->size--;
 136         while (parent != NULL)
 137         {
 138                 parent->size--;
 139                 parent = parent->parent;
 140         }
 141 }
 142
 143 /*
 144  * freenfa - free an entire NFA
 145  */
 146 static void
 147 freenfa(struct nfa * nfa)
 148 {
 149         struct state *s;
 150
 151         while ((s = nfa->states) != NULL)
 152         {
 153                 s->nins = s->nouts = 0; /* don't worry about arcs */
 154                 freestate(nfa, s);
 155         }
 156         while ((s = nfa->free) != NULL)
 157         {
 158                 nfa->free = s->next;
 159                 destroystate(nfa, s);
 160         }
 161
 162         nfa->slast = NULL;
 163         nfa->nstates = -1;
 164         nfa->pre = NULL;
 165         nfa->post = NULL;
 166         FREE(nfa);
 167 }
 168
 169 /*
 170  * newstate - allocate an NFA state, with zero flag value
 171  */
 172 static struct state *                   /* NULL on error */
 173 newstate(struct nfa * nfa)
 174 {
 175         struct state *s;
 176
 177         if (TooManyStates(nfa))
 178         {
 179                 NERR(REG_ETOOBIG);
 180                 return NULL;
 181         }
 182         if (nfa->free != NULL)
 183         {
 184                 s = nfa->free;
 185                 nfa->free = s->next;
 186         }
 187         else
 188         {
 189                 s = (struct state *) MALLOC(sizeof(struct state));
 190                 if (s == NULL)
 191                 {
 192                         NERR(REG_ESPACE);
 193                         return NULL;
 194                 }
 195                 s->oas.next = NULL;
 196                 s->free = NULL;
 197                 s->noas = 0;
 198         }
 199
 200         assert(nfa->nstates >= 0);
 201         s->no = nfa->nstates++;
 202         s->flag = 0;
 203         if (nfa->states == NULL)
 204                 nfa->states = s;
 205         s->nins = 0;
 206         s->ins = NULL;
 207         s->nouts = 0;
 208         s->outs = NULL;
 209         s->tmp = NULL;
 210         s->next = NULL;
 211         if (nfa->slast != NULL)
 212         {
 213                 assert(nfa->slast->next == NULL);
 214                 nfa->slast->next = s;
 215         }
 216         s->prev = nfa->slast;
 217         nfa->slast = s;
 218         /* track the current size and the parent size */
 219         IncrementSize(nfa);
 220         return s;
 221 }
 222
 223 /*
 224  * newfstate - allocate an NFA state with a specified flag value
 225  */
 226 static struct state *                   /* NULL on error */
 227 newfstate(struct nfa * nfa, int flag)
 228 {
 229         struct state *s;
 230
 231         s = newstate(nfa);
 232         if (s != NULL)
 233                 s->flag = (char) flag;
 234         return s;
 235 }
 236
 237 /*
 238  * dropstate - delete a state's inarcs and outarcs and free it
 239  */
 240 static void
 241 dropstate(struct nfa * nfa,
 242                   struct state * s)
 243 {
 244         struct arc *a;
 245
 246         while ((a = s->ins) != NULL)
 247                 freearc(nfa, a);
 248         while ((a = s->outs) != NULL)
 249                 freearc(nfa, a);
 250         freestate(nfa, s);
 251 }
 252
 253 /*
 254  * freestate - free a state, which has no in-arcs or out-arcs
 255  */
 256 static void
 257 freestate(struct nfa * nfa,
 258                   struct state * s)
 259 {
 260         assert(s != NULL);
 261         assert(s->nins == 0 && s->nouts == 0);
 262
 263         s->no = FREESTATE;
 264         s->flag = 0;
 265         if (s->next != NULL)
 266                 s->next->prev = s->prev;
 267         else
 268         {
 269                 assert(s == nfa->slast);
 270                 nfa->slast = s->prev;
 271         }
 272         if (s->prev != NULL)
 273                 s->prev->next = s->next;
 274         else
 275         {
 276                 assert(s == nfa->states);
 277                 nfa->states = s->next;
 278         }
 279         s->prev = NULL;
 280         s->next = nfa->free;            /* don't delete it, put it on the free list */
 281         nfa->free = s;
 282         DecrementSize(nfa);
 283 }
 284
 285 /*
 286  * destroystate - really get rid of an already-freed state
 287  */
 288 static void
 289 destroystate(struct nfa * nfa,
 290                          struct state * s)
 291 {
 292         struct arcbatch *ab;
 293         struct arcbatch *abnext;
 294
 295         assert(s->no == FREESTATE);
 296         for (ab = s->oas.next; ab != NULL; ab = abnext)
 297         {
 298                 abnext = ab->next;
 299                 FREE(ab);
 300         }
 301         s->ins = NULL;
 302         s->outs = NULL;
 303         s->next = NULL;
 304         FREE(s);
 305 }
 306
 307 /*
 308  * newarc - set up a new arc within an NFA
 309  */
 310 static void
 311 newarc(struct nfa * nfa,
 312            int t,
 313            pcolor co,
 314            struct state * from,
 315            struct state * to)
 316 {
 317         struct arc *a;
 318
 319         assert(from != NULL && to != NULL);
 320
 321         /* check for duplicates */
 322         for (a = from->outs; a != NULL; a = a->outchain)
 323                 if (a->to == to && a->co == co && a->type == t)
 324                         return;
 325
 326         a = allocarc(nfa, from);
 327         if (NISERR())
 328                 return;
 329         assert(a != NULL);
 330
 331         a->type = t;
 332         a->co = (color) co;
 333         a->to = to;
 334         a->from = from;
 335
 336         /*
 337          * Put the new arc on the beginning, not the end, of the chains. Not only
 338          * is this easier, it has the very useful side effect that deleting the
 339          * most-recently-added arc is the cheapest case rather than the most
 340          * expensive one.
 341          */
 342         a->inchain = to->ins;
 343         to->ins = a;
 344         a->outchain = from->outs;
 345         from->outs = a;
 346
 347         from->nouts++;
 348         to->nins++;
 349
 350         if (COLORED(a) && nfa->parent == NULL)
 351                 colorchain(nfa->cm, a);
 352 }
 353
 354 /*
 355  * allocarc - allocate a new out-arc within a state
 356  */
 357 static struct arc *                             /* NULL for failure */
 358 allocarc(struct nfa * nfa,
 359                  struct state * s)
 360 {
 361         struct arc *a;
 362
 363         /* shortcut */
 364         if (s->free == NULL && s->noas < ABSIZE)
 365         {
 366                 a = &s->oas.a[s->noas];
 367                 s->noas++;
 368                 return a;
 369         }
 370
 371         /* if none at hand, get more */
 372         if (s->free == NULL)
 373         {
 374                 struct arcbatch *newAb;
 375                 int                     i;
 376
 377                 newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
 378                 if (newAb == NULL)
 379                 {
 380                         NERR(REG_ESPACE);
 381                         return NULL;
 382                 }
 383                 newAb->next = s->oas.next;
 384                 s->oas.next = newAb;
 385
 386                 for (i = 0; i < ABSIZE; i++)
 387                 {
 388                         newAb->a[i].type = 0;
 389                         newAb->a[i].freechain = &newAb->a[i + 1];
 390                 }
 391                 newAb->a[ABSIZE - 1].freechain = NULL;
 392                 s->free = &newAb->a[0];
 393         }
 394         assert(s->free != NULL);
 395
 396         a = s->free;
 397         s->free = a->freechain;
 398         return a;
 399 }
 400
 401 /*
 402  * freearc - free an arc
 403  */
 404 static void
 405 freearc(struct nfa * nfa,
 406                 struct arc * victim)
 407 {
 408         struct state *from = victim->from;
 409         struct state *to = victim->to;
 410         struct arc *a;
 411
 412         assert(victim->type != 0);
 413
 414         /* take it off color chain if necessary */
 415         if (COLORED(victim) && nfa->parent == NULL)
 416                 uncolorchain(nfa->cm, victim);
 417
 418         /* take it off source's out-chain */
 419         assert(from != NULL);
 420         assert(from->outs != NULL);
 421         a = from->outs;
 422         if (a == victim)                        /* simple case:  first in chain */
 423                 from->outs = victim->outchain;
 424         else
 425         {
 426                 for (; a != NULL && a->outchain != victim; a = a->outchain)
 427                         continue;
 428                 assert(a != NULL);
 429                 a->outchain = victim->outchain;
 430         }
 431         from->nouts--;
 432
 433         /* take it off target's in-chain */
 434         assert(to != NULL);
 435         assert(to->ins != NULL);
 436         a = to->ins;
 437         if (a == victim)                        /* simple case:  first in chain */
 438                 to->ins = victim->inchain;
 439         else
 440         {
 441                 for (; a != NULL && a->inchain != victim; a = a->inchain)
 442                         continue;
 443                 assert(a != NULL);
 444                 a->inchain = victim->inchain;
 445         }
 446         to->nins--;
 447
 448         /* clean up and place on free list */
 449         victim->type = 0;
 450         victim->from = NULL;            /* precautions... */
 451         victim->to = NULL;
 452         victim->inchain = NULL;
 453         victim->outchain = NULL;
 454         victim->freechain = from->free;
 455         from->free = victim;
 456 }
 457
 458 /*
 459  * findarc - find arc, if any, from given source with given type and color
 460  * If there is more than one such arc, the result is random.
 461  */
 462 static struct arc *
 463 findarc(struct state * s,
 464                 int type,
 465                 pcolor co)
 466 {
 467         struct arc *a;
 468
 469         for (a = s->outs; a != NULL; a = a->outchain)
 470                 if (a->type == type && a->co == co)
 471                         return a;
 472         return NULL;
 473 }
 474
 475 /*
 476  * cparc - allocate a new arc within an NFA, copying details from old one
 477  */
 478 static void
 479 cparc(struct nfa * nfa,
 480           struct arc * oa,
 481           struct state * from,
 482           struct state * to)
 483 {
 484         newarc(nfa, oa->type, oa->co, from, to);
 485 }
 486
 487 /*
 488  * moveins - move all in arcs of a state to another state
 489  *
 490  * You might think this could be done better by just updating the
 491  * existing arcs, and you would be right if it weren't for the desire
 492  * for duplicate suppression, which makes it easier to just make new
 493  * ones to exploit the suppression built into newarc.
 494  */
 495 static void
 496 moveins(struct nfa * nfa,
 497                 struct state * oldState,
 498                 struct state * newState)
 499 {
 500         struct arc *a;
 501
 502         assert(oldState != newState);
 503
 504         while ((a = oldState->ins) != NULL)
 505         {
 506                 cparc(nfa, a, a->from, newState);
 507                 freearc(nfa, a);
 508         }
 509         assert(oldState->nins == 0);
 510         assert(oldState->ins == NULL);
 511 }
 512
 513 /*
 514  * copyins - copy all in arcs of a state to another state
 515  */
 516 static void
 517 copyins(struct nfa * nfa,
 518                 struct state * oldState,
 519                 struct state * newState)
 520 {
 521         struct arc *a;
 522
 523         assert(oldState != newState);
 524
 525         for (a = oldState->ins; a != NULL; a = a->inchain)
 526                 cparc(nfa, a, a->from, newState);
 527 }
 528
 529 /*
 530  * moveouts - move all out arcs of a state to another state
 531  */
 532 static void
 533 moveouts(struct nfa * nfa,
 534                  struct state * oldState,
 535                  struct state * newState)
 536 {
 537         struct arc *a;
 538
 539         assert(oldState != newState);
 540
 541         while ((a = oldState->outs) != NULL)
 542         {
 543                 cparc(nfa, a, newState, a->to);
 544                 freearc(nfa, a);
 545         }
 546 }
 547
 548 /*
 549  * copyouts - copy all out arcs of a state to another state
 550  */
 551 static void
 552 copyouts(struct nfa * nfa,
 553                  struct state * oldState,
 554                  struct state * newState)
 555 {
 556         struct arc *a;
 557
 558         assert(oldState != newState);
 559
 560         for (a = oldState->outs; a != NULL; a = a->outchain)
 561                 cparc(nfa, a, newState, a->to);
 562 }
 563
 564 /*
 565  * cloneouts - copy out arcs of a state to another state pair, modifying type
 566  */
 567 static void
 568 cloneouts(struct nfa * nfa,
 569                   struct state * old,
 570                   struct state * from,
 571                   struct state * to,
 572                   int type)
 573 {
 574         struct arc *a;
 575
 576         assert(old != from);
 577
 578         for (a = old->outs; a != NULL; a = a->outchain)
 579                 newarc(nfa, type, a->co, from, to);
 580 }
 581
 582 /*
 583  * delsub - delete a sub-NFA, updating subre pointers if necessary
 584  *
 585  * This uses a recursive traversal of the sub-NFA, marking already-seen
 586  * states using their tmp pointer.
 587  */
 588 static void
 589 delsub(struct nfa * nfa,
 590            struct state * lp,           /* the sub-NFA goes from here... */
 591            struct state * rp)           /* ...to here, *not* inclusive */
 592 {
 593         assert(lp != rp);
 594
 595         rp->tmp = rp;                           /* mark end */
 596
 597         deltraverse(nfa, lp, lp);
 598         assert(lp->nouts == 0 && rp->nins == 0);        /* did the job */
 599         assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
 600
 601         rp->tmp = NULL;                         /* unmark end */
 602         lp->tmp = NULL;                         /* and begin, marked by deltraverse */
 603 }
 604
 605 /*
 606  * deltraverse - the recursive heart of delsub
 607  * This routine's basic job is to destroy all out-arcs of the state.
 608  */
 609 static void
 610 deltraverse(struct nfa * nfa,
 611                         struct state * leftend,
 612                         struct state * s)
 613 {
 614         struct arc *a;
 615         struct state *to;
 616
 617         if (s->nouts == 0)
 618                 return;                                 /* nothing to do */
 619         if (s->tmp != NULL)
 620                 return;                                 /* already in progress */
 621
 622         s->tmp = s;                                     /* mark as in progress */
 623
 624         while ((a = s->outs) != NULL)
 625         {
 626                 to = a->to;
 627                 deltraverse(nfa, leftend, to);
 628                 assert(to->nouts == 0 || to->tmp != NULL);
 629                 freearc(nfa, a);
 630                 if (to->nins == 0 && to->tmp == NULL)
 631                 {
 632                         assert(to->nouts == 0);
 633                         freestate(nfa, to);
 634                 }
 635         }
 636
 637         assert(s->no != FREESTATE); /* we're still here */
 638         assert(s == leftend || s->nins != 0);           /* and still reachable */
 639         assert(s->nouts == 0);          /* but have no outarcs */
 640
 641         s->tmp = NULL;                          /* we're done here */
 642 }
 643
 644 /*
 645  * dupnfa - duplicate sub-NFA
 646  *
 647  * Another recursive traversal, this time using tmp to point to duplicates
 648  * as well as mark already-seen states.  (You knew there was a reason why
 649  * it's a state pointer, didn't you? :-))
 650  */
 651 static void
 652 dupnfa(struct nfa * nfa,
 653            struct state * start,        /* duplicate of subNFA starting here */
 654            struct state * stop,         /* and stopping here */
 655            struct state * from,         /* stringing duplicate from here */
 656            struct state * to)           /* to here */
 657 {
 658         if (start == stop)
 659         {
 660                 newarc(nfa, EMPTY, 0, from, to);
 661                 return;
 662         }
 663
 664         stop->tmp = to;
 665         duptraverse(nfa, start, from);
 666         /* done, except for clearing out the tmp pointers */
 667
 668         stop->tmp = NULL;
 669         cleartraverse(nfa, start);
 670 }
 671
 672 /*
 673  * duptraverse - recursive heart of dupnfa
 674  */
 675 static void
 676 duptraverse(struct nfa * nfa,
 677                         struct state * s,
 678                         struct state * stmp)    /* s's duplicate, or NULL */
 679 {
 680         struct arc *a;
 681
 682         if (s->tmp != NULL)
 683                 return;                                 /* already done */
 684
 685         s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
 686         if (s->tmp == NULL)
 687         {
 688                 assert(NISERR());
 689                 return;
 690         }
 691
 692         for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
 693         {
 694                 duptraverse(nfa, a->to, (struct state *) NULL);
 695                 if (NISERR())
 696                         break;
 697                 assert(a->to->tmp != NULL);
 698                 cparc(nfa, a, s->tmp, a->to->tmp);
 699         }
 700 }
 701
 702 /*
 703  * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
 704  */
 705 static void
 706 cleartraverse(struct nfa * nfa,
 707                           struct state * s)
 708 {
 709         struct arc *a;
 710
 711         if (s->tmp == NULL)
 712                 return;
 713         s->tmp = NULL;
 714
 715         for (a = s->outs; a != NULL; a = a->outchain)
 716                 cleartraverse(nfa, a->to);
 717 }
 718
 719 /*
 720  * specialcolors - fill in special colors for an NFA
 721  */
 722 static void
 723 specialcolors(struct nfa * nfa)
 724 {
 725         /* false colors for BOS, BOL, EOS, EOL */
 726         if (nfa->parent == NULL)
 727         {
 728                 nfa->bos[0] = pseudocolor(nfa->cm);
 729                 nfa->bos[1] = pseudocolor(nfa->cm);
 730                 nfa->eos[0] = pseudocolor(nfa->cm);
 731                 nfa->eos[1] = pseudocolor(nfa->cm);
 732         }
 733         else
 734         {
 735                 assert(nfa->parent->bos[0] != COLORLESS);
 736                 nfa->bos[0] = nfa->parent->bos[0];
 737                 assert(nfa->parent->bos[1] != COLORLESS);
 738                 nfa->bos[1] = nfa->parent->bos[1];
 739                 assert(nfa->parent->eos[0] != COLORLESS);
 740                 nfa->eos[0] = nfa->parent->eos[0];
 741                 assert(nfa->parent->eos[1] != COLORLESS);
 742                 nfa->eos[1] = nfa->parent->eos[1];
 743         }
 744 }
 745
 746 /*
 747  * optimize - optimize an NFA
 748  */
 749 static long                                             /* re_info bits */
 750 optimize(struct nfa * nfa,
 751                  FILE *f)                               /* for debug output; NULL none */
 752 {
 753 #ifdef REG_DEBUG
 754         int                     verbose = (f != NULL) ? 1 : 0;
 755
 756         if (verbose)
 757                 fprintf(f, "\ninitial cleanup:\n");
 758 #endif
 759         cleanup(nfa);                           /* may simplify situation */
 760 #ifdef REG_DEBUG
 761         if (verbose)
 762                 dumpnfa(nfa, f);
 763         if (verbose)
 764                 fprintf(f, "\nempties:\n");
 765 #endif
 766         fixempties(nfa, f);                     /* get rid of EMPTY arcs */
 767 #ifdef REG_DEBUG
 768         if (verbose)
 769                 fprintf(f, "\nconstraints:\n");
 770 #endif
 771         pullback(nfa, f);                       /* pull back constraints backward */
 772         pushfwd(nfa, f);                        /* push fwd constraints forward */
 773 #ifdef REG_DEBUG
 774         if (verbose)
 775                 fprintf(f, "\nfinal cleanup:\n");
 776 #endif
 777         cleanup(nfa);                           /* final tidying */
 778         return analyze(nfa);            /* and analysis */
 779 }
 780
 781 /*
 782  * pullback - pull back constraints backward to (with luck) eliminate them
 783  */
 784 static void
 785 pullback(struct nfa * nfa,
 786                  FILE *f)                               /* for debug output; NULL none */
 787 {
 788         struct state *s;
 789         struct state *nexts;
 790         struct arc *a;
 791         struct arc *nexta;
 792         int                     progress;
 793
 794         /* find and pull until there are no more */
 795         do
 796         {
 797                 progress = 0;
 798                 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
 799                 {
 800                         nexts = s->next;
 801                         for (a = s->outs; a != NULL && !NISERR(); a = nexta)
 802                         {
 803                                 nexta = a->outchain;
 804                                 if (a->type == '^' || a->type == BEHIND)
 805                                         if (pull(nfa, a))
 806                                                 progress = 1;
 807                                 assert(nexta == NULL || s->no != FREESTATE);
 808                         }
 809                 }
 810                 if (progress && f != NULL)
 811                         dumpnfa(nfa, f);
 812         } while (progress && !NISERR());
 813         if (NISERR())
 814                 return;
 815
 816         for (a = nfa->pre->outs; a != NULL; a = nexta)
 817         {
 818                 nexta = a->outchain;
 819                 if (a->type == '^')
 820                 {
 821                         assert(a->co == 0 || a->co == 1);
 822                         newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
 823                         freearc(nfa, a);
 824                 }
 825         }
 826 }
 827
 828 /*
 829  * pull - pull a back constraint backward past its source state
 830  * A significant property of this function is that it deletes at most
 831  * one state -- the constraint's from state -- and only if the constraint
 832  * was that state's last outarc.
 833  */
 834 static int                                              /* 0 couldn't, 1 could */
 835 pull(struct nfa * nfa,
 836          struct arc * con)
 837 {
 838         struct state *from = con->from;
 839         struct state *to = con->to;
 840         struct arc *a;
 841         struct arc *nexta;
 842         struct state *s;
 843
 844         if (from == to)
 845         {                                                       /* circular constraint is pointless */
 846                 freearc(nfa, con);
 847                 return 1;
 848         }
 849         if (from->flag)                         /* can't pull back beyond start */
 850                 return 0;
 851         if (from->nins == 0)
 852         {                                                       /* unreachable */
 853                 freearc(nfa, con);
 854                 return 1;
 855         }
 856
 857         /*
 858          * DGP 2007-11-15: Cloning a state with a circular constraint on its list
 859          * of outs can lead to trouble [Tcl Bug 1810038], so get rid of them first.
 860          */
 861         for (a = from->outs; a != NULL; a = nexta)
 862         {
 863                 nexta = a->outchain;
 864                 switch (a->type)
 865                 {
 866                         case '^':
 867                         case '$':
 868                         case BEHIND:
 869                         case AHEAD:
 870                                 if (from == a->to)
 871                                         freearc(nfa, a);
 872                                 break;
 873                 }
 874         }
 875
 876         /* first, clone from state if necessary to avoid other outarcs */
 877         if (from->nouts > 1)
 878         {
 879                 s = newstate(nfa);
 880                 if (NISERR())
 881                         return 0;
 882                 assert(to != from);             /* con is not an inarc */
 883                 copyins(nfa, from, s);  /* duplicate inarcs */
 884                 cparc(nfa, con, s, to); /* move constraint arc */
 885                 freearc(nfa, con);
 886                 from = s;
 887                 con = from->outs;
 888         }
 889         assert(from->nouts == 1);
 890
 891         /* propagate the constraint into the from state's inarcs */
 892         for (a = from->ins; a != NULL; a = nexta)
 893         {
 894                 nexta = a->inchain;
 895                 switch (combine(con, a))
 896                 {
 897                         case INCOMPATIBLE:      /* destroy the arc */
 898                                 freearc(nfa, a);
 899                                 break;
 900                         case SATISFIED:         /* no action needed */
 901                                 break;
 902                         case COMPATIBLE:        /* swap the two arcs, more or less */
 903                                 s = newstate(nfa);
 904                                 if (NISERR())
 905                                         return 0;
 906                                 cparc(nfa, a, s, to);   /* anticipate move */
 907                                 cparc(nfa, con, a->from, s);
 908                                 if (NISERR())
 909                                         return 0;
 910                                 freearc(nfa, a);
 911                                 break;
 912                         default:
 913                                 assert(NOTREACHED);
 914                                 break;
 915                 }
 916         }
 917
 918         /* remaining inarcs, if any, incorporate the constraint */
 919         moveins(nfa, from, to);
 920         dropstate(nfa, from);           /* will free the constraint */
 921         return 1;
 922 }
 923
 924 /*
 925  * pushfwd - push forward constraints forward to (with luck) eliminate them
 926  */
 927 static void
 928 pushfwd(struct nfa * nfa,
 929                 FILE *f)                                /* for debug output; NULL none */
 930 {
 931         struct state *s;
 932         struct state *nexts;
 933         struct arc *a;
 934         struct arc *nexta;
 935         int                     progress;
 936
 937         /* find and push until there are no more */
 938         do
 939         {
 940                 progress = 0;
 941                 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
 942                 {
 943                         nexts = s->next;
 944                         for (a = s->ins; a != NULL && !NISERR(); a = nexta)
 945                         {
 946                                 nexta = a->inchain;
 947                                 if (a->type == '$' || a->type == AHEAD)
 948                                         if (push(nfa, a))
 949                                                 progress = 1;
 950                                 assert(nexta == NULL || s->no != FREESTATE);
 951                         }
 952                 }
 953                 if (progress && f != NULL)
 954                         dumpnfa(nfa, f);
 955         } while (progress && !NISERR());
 956         if (NISERR())
 957                 return;
 958
 959         for (a = nfa->post->ins; a != NULL; a = nexta)
 960         {
 961                 nexta = a->inchain;
 962                 if (a->type == '$')
 963                 {
 964                         assert(a->co == 0 || a->co == 1);
 965                         newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
 966                         freearc(nfa, a);
 967                 }
 968         }
 969 }
 970
 971 /*
 972  * push - push a forward constraint forward past its destination state
 973  * A significant property of this function is that it deletes at most
 974  * one state -- the constraint's to state -- and only if the constraint
 975  * was that state's last inarc.
 976  */
 977 static int                                              /* 0 couldn't, 1 could */
 978 push(struct nfa * nfa,
 979          struct arc * con)
 980 {
 981         struct state *from = con->from;
 982         struct state *to = con->to;
 983         struct arc *a;
 984         struct arc *nexta;
 985         struct state *s;
 986
 987         if (to == from)
 988         {                                                       /* circular constraint is pointless */
 989                 freearc(nfa, con);
 990                 return 1;
 991         }
 992         if (to->flag)                           /* can't push forward beyond end */
 993                 return 0;
 994         if (to->nouts == 0)
 995         {                                                       /* dead end */
 996                 freearc(nfa, con);
 997                 return 1;
 998         }
 999
1000         /*
1001          * DGP 2007-11-15: Here we duplicate the same protections as appear
1002          * in pull() above to avoid troubles with cloning a state with a
1003          * circular constraint on its list of ins.  It is not clear whether
1004          * this is necessary, or is protecting against a "can't happen".
1005          * Any test case that actually leads to a freearc() call here would
1006          * be a welcome addition to the test suite.
1007          */
1008         for (a = to->ins; a != NULL; a = nexta)
1009         {
1010                 nexta = a->inchain;
1011                 switch (a->type)
1012                 {
1013                         case '^':
1014                         case '$':
1015                         case BEHIND:
1016                         case AHEAD:
1017                                 if (a->from == to)
1018                                         freearc(nfa, a);
1019                                 break;
1020                 }
1021         }
1022
1023         /* first, clone to state if necessary to avoid other inarcs */
1024         if (to->nins > 1)
1025         {
1026                 s = newstate(nfa);
1027                 if (NISERR())
1028                         return 0;
1029                 copyouts(nfa, to, s);   /* duplicate outarcs */
1030                 cparc(nfa, con, from, s);               /* move constraint */
1031                 freearc(nfa, con);
1032                 to = s;
1033                 con = to->ins;
1034         }
1035         assert(to->nins == 1);
1036
1037         /* propagate the constraint into the to state's outarcs */
1038         for (a = to->outs; a != NULL; a = nexta)
1039         {
1040                 nexta = a->outchain;
1041                 switch (combine(con, a))
1042                 {
1043                         case INCOMPATIBLE:      /* destroy the arc */
1044                                 freearc(nfa, a);
1045                                 break;
1046                         case SATISFIED:         /* no action needed */
1047                                 break;
1048                         case COMPATIBLE:        /* swap the two arcs, more or less */
1049                                 s = newstate(nfa);
1050                                 if (NISERR())
1051                                         return 0;
1052                                 cparc(nfa, con, s, a->to);              /* anticipate move */
1053                                 cparc(nfa, a, from, s);
1054                                 if (NISERR())
1055                                         return 0;
1056                                 freearc(nfa, a);
1057                                 break;
1058                         default:
1059                                 assert(NOTREACHED);
1060                                 break;
1061                 }
1062         }
1063
1064         /* remaining outarcs, if any, incorporate the constraint */
1065         moveouts(nfa, to, from);
1066         dropstate(nfa, to);                     /* will free the constraint */
1067         return 1;
1068 }
1069
1070 /*
1071  * combine - constraint lands on an arc, what happens?
1072  *
1073  * #def INCOMPATIBLE    1       // destroys arc
1074  * #def SATISFIED               2       // constraint satisfied
1075  * #def COMPATIBLE              3       // compatible but not satisfied yet
1076  */
1077 static int
1078 combine(struct arc * con,
1079                 struct arc * a)
1080 {
1081 #define  CA(ct,at)       (((ct)<<CHAR_BIT) | (at))
1082
1083         switch (CA(con->type, a->type))
1084         {
1085                 case CA('^', PLAIN):    /* newlines are handled separately */
1086                 case CA('$', PLAIN):
1087                         return INCOMPATIBLE;
1088                         break;
1089                 case CA(AHEAD, PLAIN):  /* color constraints meet colors */
1090                 case CA(BEHIND, PLAIN):
1091                         if (con->co == a->co)
1092                                 return SATISFIED;
1093                         return INCOMPATIBLE;
1094                         break;
1095                 case CA('^', '^'):              /* collision, similar constraints */
1096                 case CA('$', '$'):
1097                 case CA(AHEAD, AHEAD):
1098                 case CA(BEHIND, BEHIND):
1099                         if (con->co == a->co)           /* true duplication */
1100                                 return SATISFIED;
1101                         return INCOMPATIBLE;
1102                         break;
1103                 case CA('^', BEHIND):   /* collision, dissimilar constraints */
1104                 case CA(BEHIND, '^'):
1105                 case CA('$', AHEAD):
1106                 case CA(AHEAD, '$'):
1107                         return INCOMPATIBLE;
1108                         break;
1109                 case CA('^', '$'):              /* constraints passing each other */
1110                 case CA('^', AHEAD):
1111                 case CA(BEHIND, '$'):
1112                 case CA(BEHIND, AHEAD):
1113                 case CA('$', '^'):
1114                 case CA('$', BEHIND):
1115                 case CA(AHEAD, '^'):
1116                 case CA(AHEAD, BEHIND):
1117                 case CA('^', LACON):
1118                 case CA(BEHIND, LACON):
1119                 case CA('$', LACON):
1120                 case CA(AHEAD, LACON):
1121                         return COMPATIBLE;
1122                         break;
1123         }
1124         assert(NOTREACHED);
1125         return INCOMPATIBLE;            /* for benefit of blind compilers */
1126 }
1127
1128 /*
1129  * fixempties - get rid of EMPTY arcs
1130  */
1131 static void
1132 fixempties(struct nfa * nfa,
1133                    FILE *f)                             /* for debug output; NULL none */
1134 {
1135         struct state *s;
1136         struct state *nexts;
1137         struct arc *a;
1138         struct arc *nexta;
1139         int                     progress;
1140
1141         /* find and eliminate empties until there are no more */
1142         do
1143         {
1144                 progress = 0;
1145                 for (s = nfa->states; s != NULL && !NISERR() &&
1146                                  s->no != FREESTATE; s = nexts)
1147                 {
1148                         nexts = s->next;
1149                         for (a = s->outs; a != NULL && !NISERR(); a = nexta)
1150                         {
1151                                 nexta = a->outchain;
1152                                 if (a->type == EMPTY && unempty(nfa, a))
1153                                         progress = 1;
1154                                 assert(nexta == NULL || s->no != FREESTATE);
1155                         }
1156                 }
1157                 if (progress && f != NULL)
1158                         dumpnfa(nfa, f);
1159         } while (progress && !NISERR());
1160 }
1161
1162 /*
1163  * unempty - optimize out an EMPTY arc, if possible
1164  *
1165  * Actually, as it stands this function always succeeds, but the return
1166  * value is kept with an eye on possible future changes.
1167  */
1168 static int                                              /* 0 couldn't, 1 could */
1169 unempty(struct nfa * nfa,
1170                 struct arc * a)
1171 {
1172         struct state *from = a->from;
1173         struct state *to = a->to;
1174         int                     usefrom;                /* work on from, as opposed to to? */
1175
1176         assert(a->type == EMPTY);
1177         assert(from != nfa->pre && to != nfa->post);
1178
1179         if (from == to)
1180         {                                                       /* vacuous loop */
1181                 freearc(nfa, a);
1182                 return 1;
1183         }
1184
1185         /* decide which end to work on */
1186         usefrom = 1;                            /* default:  attack from */
1187         if (from->nouts > to->nins)
1188                 usefrom = 0;
1189         else if (from->nouts == to->nins)
1190         {
1191                 /* decide on secondary issue:  move/copy fewest arcs */
1192                 if (from->nins > to->nouts)
1193                         usefrom = 0;
1194         }
1195
1196         freearc(nfa, a);
1197         if (usefrom)
1198         {
1199                 if (from->nouts == 0)
1200                 {
1201                         /* was the state's only outarc */
1202                         moveins(nfa, from, to);
1203                         freestate(nfa, from);
1204                 }
1205                 else
1206                         copyins(nfa, from, to);
1207         }
1208         else
1209         {
1210                 if (to->nins == 0)
1211                 {
1212                         /* was the state's only inarc */
1213                         moveouts(nfa, to, from);
1214                         freestate(nfa, to);
1215                 }
1216                 else
1217                         copyouts(nfa, to, from);
1218         }
1219
1220         return 1;
1221 }
1222
1223 /*
1224  * cleanup - clean up NFA after optimizations
1225  */
1226 static void
1227 cleanup(struct nfa * nfa)
1228 {
1229         struct state *s;
1230         struct state *nexts;
1231         int                     n;
1232
1233         /* clear out unreachable or dead-end states */
1234         /* use pre to mark reachable, then post to mark can-reach-post */
1235         markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
1236         markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
1237         for (s = nfa->states; s != NULL; s = nexts)
1238         {
1239                 nexts = s->next;
1240                 if (s->tmp != nfa->post && !s->flag)
1241                         dropstate(nfa, s);
1242         }
1243         assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
1244         cleartraverse(nfa, nfa->pre);
1245         assert(nfa->post->nins == 0 || nfa->post->tmp == NULL);
1246         /* the nins==0 (final unreachable) case will be caught later */
1247
1248         /* renumber surviving states */
1249         n = 0;
1250         for (s = nfa->states; s != NULL; s = s->next)
1251                 s->no = n++;
1252         nfa->nstates = n;
1253 }
1254
1255 /*
1256  * markreachable - recursive marking of reachable states
1257  */
1258 static void
1259 markreachable(struct nfa * nfa,
1260                           struct state * s,
1261                           struct state * okay,          /* consider only states with this mark */
1262                           struct state * mark)          /* the value to mark with */
1263 {
1264         struct arc *a;
1265
1266         if (s->tmp != okay)
1267                 return;
1268         s->tmp = mark;
1269
1270         for (a = s->outs; a != NULL; a = a->outchain)
1271                 markreachable(nfa, a->to, okay, mark);
1272 }
1273
1274 /*
1275  * markcanreach - recursive marking of states which can reach here
1276  */
1277 static void
1278 markcanreach(struct nfa * nfa,
1279                          struct state * s,
1280                          struct state * okay,           /* consider only states with this mark */
1281                          struct state * mark)           /* the value to mark with */
1282 {
1283         struct arc *a;
1284
1285         if (s->tmp != okay)
1286                 return;
1287         s->tmp = mark;
1288
1289         for (a = s->ins; a != NULL; a = a->inchain)
1290                 markcanreach(nfa, a->from, okay, mark);
1291 }
1292
1293 /*
1294  * analyze - ascertain potentially-useful facts about an optimized NFA
1295  */
1296 static long                                             /* re_info bits to be ORed in */
1297 analyze(struct nfa * nfa)
1298 {
1299         struct arc *a;
1300         struct arc *aa;
1301
1302         if (nfa->pre->outs == NULL)
1303                 return REG_UIMPOSSIBLE;
1304         for (a = nfa->pre->outs; a != NULL; a = a->outchain)
1305                 for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
1306                         if (aa->to == nfa->post)
1307                                 return REG_UEMPTYMATCH;
1308         return 0;
1309 }
1310
1311 /*
1312  * compact - compact an NFA
1313  */
1314 static void
1315 compact(struct nfa * nfa,
1316                 struct cnfa * cnfa)
1317 {
1318         struct state *s;
1319         struct arc *a;
1320         size_t          nstates;
1321         size_t          narcs;
1322         struct carc *ca;
1323         struct carc *first;
1324
1325         assert(!NISERR());
1326
1327         nstates = 0;
1328         narcs = 0;
1329         for (s = nfa->states; s != NULL; s = s->next)
1330         {
1331                 nstates++;
1332                 narcs += 1 + s->nouts + 1;
1333                 /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
1334         }
1335
1336         cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
1337         cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
1338         if (cnfa->states == NULL || cnfa->arcs == NULL)
1339         {
1340                 if (cnfa->states != NULL)
1341                         FREE(cnfa->states);
1342                 if (cnfa->arcs != NULL)
1343                         FREE(cnfa->arcs);
1344                 NERR(REG_ESPACE);
1345                 return;
1346         }
1347         cnfa->nstates = nstates;
1348         cnfa->pre = nfa->pre->no;
1349         cnfa->post = nfa->post->no;
1350         cnfa->bos[0] = nfa->bos[0];
1351         cnfa->bos[1] = nfa->bos[1];
1352         cnfa->eos[0] = nfa->eos[0];
1353         cnfa->eos[1] = nfa->eos[1];
1354         cnfa->ncolors = maxcolor(nfa->cm) + 1;
1355         cnfa->flags = 0;
1356
1357         ca = cnfa->arcs;
1358         for (s = nfa->states; s != NULL; s = s->next)
1359         {
1360                 assert((size_t) s->no < nstates);
1361                 cnfa->states[s->no] = ca;
1362                 ca->co = 0;                             /* clear and skip flags "arc" */
1363                 ca++;
1364                 first = ca;
1365                 for (a = s->outs; a != NULL; a = a->outchain)
1366                         switch (a->type)
1367                         {
1368                                 case PLAIN:
1369                                         ca->co = a->co;
1370                                         ca->to = a->to->no;
1371                                         ca++;
1372                                         break;
1373                                 case LACON:
1374                                         assert(s->no != cnfa->pre);
1375                                         ca->co = (color) (cnfa->ncolors + a->co);
1376                                         ca->to = a->to->no;
1377                                         ca++;
1378                                         cnfa->flags |= HASLACONS;
1379                                         break;
1380                                 default:
1381                                         assert(NOTREACHED);
1382                                         break;
1383                         }
1384                 carcsort(first, ca - 1);
1385                 ca->co = COLORLESS;
1386                 ca->to = 0;
1387                 ca++;
1388         }
1389         assert(ca == &cnfa->arcs[narcs]);
1390         assert(cnfa->nstates != 0);
1391
1392         /* mark no-progress states */
1393         for (a = nfa->pre->outs; a != NULL; a = a->outchain)
1394                 cnfa->states[a->to->no]->co = 1;
1395         cnfa->states[nfa->pre->no]->co = 1;
1396 }
1397
1398 /*
1399  * carcsort - sort compacted-NFA arcs by color
1400  *
1401  * Really dumb algorithm, but if the list is long enough for that to matter,
1402  * you're in real trouble anyway.
1403  */
1404 static void
1405 carcsort(struct carc * first,
1406                  struct carc * last)
1407 {
1408         struct carc *p;
1409         struct carc *q;
1410         struct carc tmp;
1411
1412         if (last - first <= 1)
1413                 return;
1414
1415         for (p = first; p <= last; p++)
1416                 for (q = p; q <= last; q++)
1417                         if (p->co > q->co ||
1418                                 (p->co == q->co && p->to > q->to))
1419                         {
1420                                 assert(p != q);
1421                                 tmp = *p;
1422                                 *p = *q;
1423                                 *q = tmp;
1424                         }
1425 }
1426
1427 /*
1428  * freecnfa - free a compacted NFA
1429  */
1430 static void
1431 freecnfa(struct cnfa * cnfa)
1432 {
1433         assert(cnfa->nstates != 0); /* not empty already */
1434         cnfa->nstates = 0;
1435         FREE(cnfa->states);
1436         FREE(cnfa->arcs);
1437 }
1438
1439 /*
1440  * dumpnfa - dump an NFA in human-readable form
1441  */
1442 static void
1443 dumpnfa(struct nfa * nfa,
1444                 FILE *f)
1445 {
1446 #ifdef REG_DEBUG
1447         struct state *s;
1448
1449         fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
1450         if (nfa->bos[0] != COLORLESS)
1451                 fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
1452         if (nfa->bos[1] != COLORLESS)
1453                 fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
1454         if (nfa->eos[0] != COLORLESS)
1455                 fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
1456         if (nfa->eos[1] != COLORLESS)
1457                 fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
1458         fprintf(f, "\n");
1459         for (s = nfa->states; s != NULL; s = s->next)
1460                 dumpstate(s, f);
1461         if (nfa->parent == NULL)
1462                 dumpcolors(nfa->cm, f);
1463         fflush(f);
1464 #endif
1465 }
1466
1467 #ifdef REG_DEBUG                                /* subordinates of dumpnfa */
1468
1469 /*
1470  * dumpstate - dump an NFA state in human-readable form
1471  */
1472 static void
1473 dumpstate(struct state * s,
1474                   FILE *f)
1475 {
1476         struct arc *a;
1477
1478         fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
1479                         (s->flag) ? s->flag : '.');
1480         if (s->prev != NULL && s->prev->next != s)
1481                 fprintf(f, "\tstate chain bad\n");
1482         if (s->nouts == 0)
1483                 fprintf(f, "\tno out arcs\n");
1484         else
1485                 dumparcs(s, f);
1486         fflush(f);
1487         for (a = s->ins; a != NULL; a = a->inchain)
1488         {
1489                 if (a->to != s)
1490                         fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
1491                                         a->from->no, a->to->no, s->no);
1492         }
1493 }
1494
1495 /*
1496  * dumparcs - dump out-arcs in human-readable form
1497  */
1498 static void
1499 dumparcs(struct state * s,
1500                  FILE *f)
1501 {
1502         int                     pos;
1503
1504         assert(s->nouts > 0);
1505         /* printing arcs in reverse order is usually clearer */
1506         pos = dumprarcs(s->outs, s, f, 1);
1507         if (pos != 1)
1508                 fprintf(f, "\n");
1509 }
1510
1511 /*
1512  * dumprarcs - dump remaining outarcs, recursively, in reverse order
1513  */
1514 static int                                              /* resulting print position */
1515 dumprarcs(struct arc * a,
1516                   struct state * s,
1517                   FILE *f,
1518                   int pos)                              /* initial print position */
1519 {
1520         if (a->outchain != NULL)
1521                 pos = dumprarcs(a->outchain, s, f, pos);
1522         dumparc(a, s, f);
1523         if (pos == 5)
1524         {
1525                 fprintf(f, "\n");
1526                 pos = 1;
1527         }
1528         else
1529                 pos++;
1530         return pos;
1531 }
1532
1533 /*
1534  * dumparc - dump one outarc in readable form, including prefixing tab
1535  */
1536 static void
1537 dumparc(struct arc * a,
1538                 struct state * s,
1539                 FILE *f)
1540 {
1541         struct arc *aa;
1542         struct arcbatch *ab;
1543
1544         fprintf(f, "\t");
1545         switch (a->type)
1546         {
1547                 case PLAIN:
1548                         fprintf(f, "[%ld]", (long) a->co);
1549                         break;
1550                 case AHEAD:
1551                         fprintf(f, ">%ld>", (long) a->co);
1552                         break;
1553                 case BEHIND:
1554                         fprintf(f, "<%ld<", (long) a->co);
1555                         break;
1556                 case LACON:
1557                         fprintf(f, ":%ld:", (long) a->co);
1558                         break;
1559                 case '^':
1560                 case '$':
1561                         fprintf(f, "%c%d", a->type, (int) a->co);
1562                         break;
1563                 case EMPTY:
1564                         break;
1565                 default:
1566                         fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
1567                         break;
1568         }
1569         if (a->from != s)
1570                 fprintf(f, "?%d?", a->from->no);
1571         for (ab = &a->from->oas; ab != NULL; ab = ab->next)
1572         {
1573                 for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
1574                         if (aa == a)
1575                                 break;                  /* NOTE BREAK OUT */
1576                 if (aa < &ab->a[ABSIZE])        /* propagate break */
1577                         break;                          /* NOTE BREAK OUT */
1578         }
1579         if (ab == NULL)
1580                 fprintf(f, "?!?");              /* not in allocated space */
1581         fprintf(f, "->");
1582         if (a->to == NULL)
1583         {
1584                 fprintf(f, "NULL");
1585                 return;
1586         }
1587         fprintf(f, "%d", a->to->no);
1588         for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
1589                 if (aa == a)
1590                         break;                          /* NOTE BREAK OUT */
1591         if (aa == NULL)
1592                 fprintf(f, "?!?");              /* missing from in-chain */
1593 }
1594 #endif   /* REG_DEBUG */
1595
1596 /*
1597  * dumpcnfa - dump a compacted NFA in human-readable form
1598  */
1599 #ifdef REG_DEBUG
1600 static void
1601 dumpcnfa(struct cnfa * cnfa,
1602                  FILE *f)
1603 {
1604         int                     st;
1605
1606         fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
1607         if (cnfa->bos[0] != COLORLESS)
1608                 fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
1609         if (cnfa->bos[1] != COLORLESS)
1610                 fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
1611         if (cnfa->eos[0] != COLORLESS)
1612                 fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
1613         if (cnfa->eos[1] != COLORLESS)
1614                 fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
1615         if (cnfa->flags & HASLACONS)
1616                 fprintf(f, ", haslacons");
1617         fprintf(f, "\n");
1618         for (st = 0; st < cnfa->nstates; st++)
1619                 dumpcstate(st, cnfa->states[st], cnfa, f);
1620         fflush(f);
1621 }
1622 #endif
1623
1624 #ifdef REG_DEBUG                                /* subordinates of dumpcnfa */
1625
1626 /*
1627  * dumpcstate - dump a compacted-NFA state in human-readable form
1628  */
1629 static void
1630 dumpcstate(int st,
1631                    struct carc * ca,
1632                    struct cnfa * cnfa,
1633                    FILE *f)
1634 {
1635         int                     i;
1636         int                     pos;
1637
1638         fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
1639         pos = 1;
1640         for (i = 1; ca[i].co != COLORLESS; i++)
1641         {
1642                 if (ca[i].co < cnfa->ncolors)
1643                         fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
1644                 else
1645                         fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors,
1646                                         ca[i].to);
1647                 if (pos == 5)
1648                 {
1649                         fprintf(f, "\n");
1650                         pos = 1;
1651                 }
1652                 else
1653                         pos++;
1654         }
1655         if (i == 1 || pos != 1)
1656                 fprintf(f, "\n");
1657         fflush(f);
1658 }
1659
1660 #endif   /* REG_DEBUG */